Mercurial > repos > iuc > humann_split_table
comparison customizemetadata.py @ 2:44cacbe09bfc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
| author | iuc |
|---|---|
| date | Tue, 07 Feb 2023 19:40:08 +0000 |
| parents | 506d4a349aa1 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:f00f2c835ac2 | 2:44cacbe09bfc |
|---|---|
| 4 import argparse | 4 import argparse |
| 5 import bz2 | 5 import bz2 |
| 6 import json | 6 import json |
| 7 import pickle | 7 import pickle |
| 8 import re | 8 import re |
| 9 import sys | |
| 10 from importlib.metadata import version | |
| 9 from pathlib import Path | 11 from pathlib import Path |
| 12 | |
| 13 from packaging.version import Version | |
| 10 | 14 |
| 11 | 15 |
| 12 def load_from_json(json_fp): | 16 def load_from_json(json_fp): |
| 13 ''' | 17 ''' |
| 14 Read JSON file with marker metadata | 18 Read JSON file with marker metadata |
| 54 out_metadata = { | 58 out_metadata = { |
| 55 'markers': in_metadata['markers'], | 59 'markers': in_metadata['markers'], |
| 56 'taxonomy': in_metadata['taxonomy'], | 60 'taxonomy': in_metadata['taxonomy'], |
| 57 'merged_taxon': {} | 61 'merged_taxon': {} |
| 58 } | 62 } |
| 63 | |
| 59 # transform merged_taxons tuple keys to string | 64 # transform merged_taxons tuple keys to string |
| 60 for k in in_metadata['merged_taxon']: | 65 for k in in_metadata['merged_taxon']: |
| 61 n = ' , '.join(k) | 66 n = ' , '.join(k) |
| 62 out_metadata[n] = in_metadata['merged_taxon'][k] | 67 out_metadata[n] = in_metadata['merged_taxon'][k] |
| 63 | 68 |
| 64 # dump metadata to JSON file | 69 # dump metadata to JSON file |
| 65 dump_to_json(out_metadata, json_fp) | 70 dump_to_json(out_metadata, json_fp) |
| 71 | |
| 72 | |
| 73 def validate_map_version(infile, file_type): | |
| 74 ''' | |
| 75 Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). | |
| 76 | |
| 77 :param infile: Path to input Pickle/JSON file | |
| 78 :param file_type: String definining file type, pkl or JSON. Case-insensitive | |
| 79 ''' | |
| 80 file_type = file_type.lower() | |
| 81 if file_type == 'pkl' or file_type == 'pickle': | |
| 82 # load metadata from Pickle file | |
| 83 with bz2.BZ2File(infile, 'r') as pkl_f: | |
| 84 in_metadata = pickle.load(pkl_f) | |
| 85 elif file_type == 'json': | |
| 86 in_metadata = load_from_json(infile) | |
| 87 else: | |
| 88 raise ValueError("Unsupported file type to validate.") | |
| 89 | |
| 90 # Get metaphlan version in $PATH | |
| 91 metaphlan_version = Version(version('metaphlan')) | |
| 92 | |
| 93 # Ensure that there are 8 taxonomy levels separated with "|"s. | |
| 94 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) | |
| 95 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) | |
| 96 for k in in_metadata['taxonomy']: | |
| 97 if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): | |
| 98 # raise ValueError("Missing/Extra values in GCA list") | |
| 99 print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) | |
| 100 sys.exit(42) | |
| 101 | |
| 102 print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) | |
| 66 | 103 |
| 67 | 104 |
| 68 def transform_json_to_pkl(json_fp, pkl_fp): | 105 def transform_json_to_pkl(json_fp, pkl_fp): |
| 69 ''' | 106 ''' |
| 70 Read JSON file and drop it to a Pickle file | 107 Read JSON file and drop it to a Pickle file |
| 78 out_metadata = { | 115 out_metadata = { |
| 79 'markers': in_metadata['markers'], | 116 'markers': in_metadata['markers'], |
| 80 'taxonomy': in_metadata['taxonomy'], | 117 'taxonomy': in_metadata['taxonomy'], |
| 81 'merged_taxon': {} | 118 'merged_taxon': {} |
| 82 } | 119 } |
| 120 | |
| 83 # transform merged_taxons keys to tuple | 121 # transform merged_taxons keys to tuple |
| 84 for k in in_metadata['merged_taxon']: | 122 for k in in_metadata['merged_taxon']: |
| 85 n = ' , '.split(k) | 123 n = ' , '.split(k) |
| 86 out_metadata[n] = in_metadata['merged_taxon'][k] | 124 out_metadata[n] = in_metadata['merged_taxon'][k] |
| 87 | 125 |
| 446 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") | 484 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") |
| 447 | 485 |
| 448 args = parser.parse_args() | 486 args = parser.parse_args() |
| 449 | 487 |
| 450 if args.function == 'transform_pkl_to_json': | 488 if args.function == 'transform_pkl_to_json': |
| 489 validate_map_version(Path(args.pkl), 'pkl') | |
| 451 transform_pkl_to_json(Path(args.pkl), Path(args.json)) | 490 transform_pkl_to_json(Path(args.pkl), Path(args.json)) |
| 452 elif args.function == 'transform_json_to_pkl': | 491 elif args.function == 'transform_json_to_pkl': |
| 492 validate_map_version(Path(args.json), 'json') | |
| 453 transform_json_to_pkl(Path(args.json), Path(args.pkl)) | 493 transform_json_to_pkl(Path(args.json), Path(args.pkl)) |
| 454 elif args.function == 'add_marker': | 494 elif args.function == 'add_marker': |
| 455 add_marker( | 495 add_marker( |
| 456 args.in_json, | 496 args.in_json, |
| 457 args.out_json, | 497 args.out_json, |
