Mercurial > repos > iuc > humann_strain_profiler
comparison customizemetadata.py @ 2:18481a7c3676 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
author | iuc |
---|---|
date | Tue, 07 Feb 2023 19:39:15 +0000 |
parents | 0ff86e44895c |
children |
comparison
equal
deleted
inserted
replaced
1:9540860a506f | 2:18481a7c3676 |
---|---|
4 import argparse | 4 import argparse |
5 import bz2 | 5 import bz2 |
6 import json | 6 import json |
7 import pickle | 7 import pickle |
8 import re | 8 import re |
9 import sys | |
10 from importlib.metadata import version | |
9 from pathlib import Path | 11 from pathlib import Path |
12 | |
13 from packaging.version import Version | |
10 | 14 |
11 | 15 |
12 def load_from_json(json_fp): | 16 def load_from_json(json_fp): |
13 ''' | 17 ''' |
14 Read JSON file with marker metadata | 18 Read JSON file with marker metadata |
54 out_metadata = { | 58 out_metadata = { |
55 'markers': in_metadata['markers'], | 59 'markers': in_metadata['markers'], |
56 'taxonomy': in_metadata['taxonomy'], | 60 'taxonomy': in_metadata['taxonomy'], |
57 'merged_taxon': {} | 61 'merged_taxon': {} |
58 } | 62 } |
63 | |
59 # transform merged_taxons tuple keys to string | 64 # transform merged_taxons tuple keys to string |
60 for k in in_metadata['merged_taxon']: | 65 for k in in_metadata['merged_taxon']: |
61 n = ' , '.join(k) | 66 n = ' , '.join(k) |
62 out_metadata[n] = in_metadata['merged_taxon'][k] | 67 out_metadata[n] = in_metadata['merged_taxon'][k] |
63 | 68 |
64 # dump metadata to JSON file | 69 # dump metadata to JSON file |
65 dump_to_json(out_metadata, json_fp) | 70 dump_to_json(out_metadata, json_fp) |
71 | |
72 | |
73 def validate_map_version(infile, file_type): | |
74 ''' | |
75 Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). | |
76 | |
77 :param infile: Path to input Pickle/JSON file | |
78 :param file_type: String definining file type, pkl or JSON. Case-insensitive | |
79 ''' | |
80 file_type = file_type.lower() | |
81 if file_type == 'pkl' or file_type == 'pickle': | |
82 # load metadata from Pickle file | |
83 with bz2.BZ2File(infile, 'r') as pkl_f: | |
84 in_metadata = pickle.load(pkl_f) | |
85 elif file_type == 'json': | |
86 in_metadata = load_from_json(infile) | |
87 else: | |
88 raise ValueError("Unsupported file type to validate.") | |
89 | |
90 # Get metaphlan version in $PATH | |
91 metaphlan_version = Version(version('metaphlan')) | |
92 | |
93 # Ensure that there are 8 taxonomy levels separated with "|"s. | |
94 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) | |
95 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) | |
96 for k in in_metadata['taxonomy']: | |
97 if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): | |
98 # raise ValueError("Missing/Extra values in GCA list") | |
99 print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) | |
100 sys.exit(42) | |
101 | |
102 print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) | |
66 | 103 |
67 | 104 |
68 def transform_json_to_pkl(json_fp, pkl_fp): | 105 def transform_json_to_pkl(json_fp, pkl_fp): |
69 ''' | 106 ''' |
70 Read JSON file and drop it to a Pickle file | 107 Read JSON file and drop it to a Pickle file |
78 out_metadata = { | 115 out_metadata = { |
79 'markers': in_metadata['markers'], | 116 'markers': in_metadata['markers'], |
80 'taxonomy': in_metadata['taxonomy'], | 117 'taxonomy': in_metadata['taxonomy'], |
81 'merged_taxon': {} | 118 'merged_taxon': {} |
82 } | 119 } |
120 | |
83 # transform merged_taxons keys to tuple | 121 # transform merged_taxons keys to tuple |
84 for k in in_metadata['merged_taxon']: | 122 for k in in_metadata['merged_taxon']: |
85 n = ' , '.split(k) | 123 n = ' , '.split(k) |
86 out_metadata[n] = in_metadata['merged_taxon'][k] | 124 out_metadata[n] = in_metadata['merged_taxon'][k] |
87 | 125 |
446 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") | 484 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") |
447 | 485 |
448 args = parser.parse_args() | 486 args = parser.parse_args() |
449 | 487 |
450 if args.function == 'transform_pkl_to_json': | 488 if args.function == 'transform_pkl_to_json': |
489 validate_map_version(Path(args.pkl), 'pkl') | |
451 transform_pkl_to_json(Path(args.pkl), Path(args.json)) | 490 transform_pkl_to_json(Path(args.pkl), Path(args.json)) |
452 elif args.function == 'transform_json_to_pkl': | 491 elif args.function == 'transform_json_to_pkl': |
492 validate_map_version(Path(args.json), 'json') | |
453 transform_json_to_pkl(Path(args.json), Path(args.pkl)) | 493 transform_json_to_pkl(Path(args.json), Path(args.pkl)) |
454 elif args.function == 'add_marker': | 494 elif args.function == 'add_marker': |
455 add_marker( | 495 add_marker( |
456 args.in_json, | 496 args.in_json, |
457 args.out_json, | 497 args.out_json, |