Mercurial > repos > thanhlv > humann_split_table
comparison customizemetadata.py @ 0:40a24d7612b8 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
| author | thanhlv |
|---|---|
| date | Mon, 13 Feb 2023 16:17:26 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:40a24d7612b8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import argparse | |
| 5 import bz2 | |
| 6 import json | |
| 7 import pickle | |
| 8 import re | |
| 9 import sys | |
| 10 from importlib.metadata import version | |
| 11 from pathlib import Path | |
| 12 | |
| 13 from packaging.version import Version | |
| 14 | |
| 15 | |
| 16 def load_from_json(json_fp): | |
| 17 ''' | |
| 18 Read JSON file with marker metadata | |
| 19 | |
| 20 :param json_fp: Path to JSON file | |
| 21 ''' | |
| 22 with open(json_fp, 'r') as json_f: | |
| 23 data = json.load(json_f) | |
| 24 | |
| 25 for m in data['markers']: | |
| 26 data['markers'][m]['ext'] = set(data['markers'][m]['ext']) | |
| 27 | |
| 28 for t in data['taxonomy']: | |
| 29 if isinstance(data['taxonomy'][t], list): | |
| 30 data['taxonomy'][t] = tuple(data['taxonomy'][t]) | |
| 31 return data | |
| 32 | |
| 33 | |
| 34 def dump_to_json(data, json_fp): | |
| 35 ''' | |
| 36 Dump marker metadata to JSON file | |
| 37 | |
| 38 :param json_fp: Path to JSON file | |
| 39 ''' | |
| 40 for m in data['markers']: | |
| 41 data['markers'][m]['ext'] = list(data['markers'][m]['ext']) | |
| 42 | |
| 43 with open(json_fp, 'w') as json_f: | |
| 44 json.dump(data, json_f) | |
| 45 | |
| 46 | |
| 47 def transform_pkl_to_json(pkl_fp, json_fp): | |
| 48 ''' | |
| 49 Read Pickle file and drop it to a JSON file | |
| 50 | |
| 51 :param pkl_fp: Path to input Pickle file | |
| 52 :param json_fp: Path to output JSON file | |
| 53 ''' | |
| 54 # load metadata from Pickle file | |
| 55 with bz2.BZ2File(pkl_fp, 'r') as pkl_f: | |
| 56 in_metadata = pickle.load(pkl_f) | |
| 57 | |
| 58 out_metadata = { | |
| 59 'markers': in_metadata['markers'], | |
| 60 'taxonomy': in_metadata['taxonomy'], | |
| 61 'merged_taxon': {} | |
| 62 } | |
| 63 | |
| 64 # transform merged_taxons tuple keys to string | |
| 65 for k in in_metadata['merged_taxon']: | |
| 66 n = ' , '.join(k) | |
| 67 out_metadata[n] = in_metadata['merged_taxon'][k] | |
| 68 | |
| 69 # dump metadata to JSON file | |
| 70 dump_to_json(out_metadata, json_fp) | |
| 71 | |
| 72 | |
| 73 def validate_map_version(infile, file_type): | |
| 74 ''' | |
| 75 Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). | |
| 76 | |
| 77 :param infile: Path to input Pickle/JSON file | |
| 78 :param file_type: String definining file type, pkl or JSON. Case-insensitive | |
| 79 ''' | |
| 80 file_type = file_type.lower() | |
| 81 if file_type == 'pkl' or file_type == 'pickle': | |
| 82 # load metadata from Pickle file | |
| 83 with bz2.BZ2File(infile, 'r') as pkl_f: | |
| 84 in_metadata = pickle.load(pkl_f) | |
| 85 elif file_type == 'json': | |
| 86 in_metadata = load_from_json(infile) | |
| 87 else: | |
| 88 raise ValueError("Unsupported file type to validate.") | |
| 89 | |
| 90 # Get metaphlan version in $PATH | |
| 91 metaphlan_version = Version(version('metaphlan')) | |
| 92 | |
| 93 # Ensure that there are 8 taxonomy levels separated with "|"s. | |
| 94 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) | |
| 95 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) | |
| 96 for k in in_metadata['taxonomy']: | |
| 97 if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): | |
| 98 # raise ValueError("Missing/Extra values in GCA list") | |
| 99 print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) | |
| 100 sys.exit(42) | |
| 101 | |
| 102 print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) | |
| 103 | |
| 104 | |
| 105 def transform_json_to_pkl(json_fp, pkl_fp): | |
| 106 ''' | |
| 107 Read JSON file and drop it to a Pickle file | |
| 108 | |
| 109 :param json_fp: Path to input JSON file | |
| 110 :param pkl_fp: Path to output Pickle file | |
| 111 ''' | |
| 112 # load metadata from JSON file | |
| 113 in_metadata = load_from_json(json_fp) | |
| 114 | |
| 115 out_metadata = { | |
| 116 'markers': in_metadata['markers'], | |
| 117 'taxonomy': in_metadata['taxonomy'], | |
| 118 'merged_taxon': {} | |
| 119 } | |
| 120 | |
| 121 # transform merged_taxons keys to tuple | |
| 122 for k in in_metadata['merged_taxon']: | |
| 123 n = ' , '.split(k) | |
| 124 out_metadata[n] = in_metadata['merged_taxon'][k] | |
| 125 | |
| 126 # dump metadata to Pickle file | |
| 127 with bz2.BZ2File(pkl_fp, 'w') as pkl_f: | |
| 128 pickle.dump(out_metadata, pkl_f) | |
| 129 | |
| 130 | |
| 131 def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name): | |
| 132 ''' | |
| 133 Add marker to JSON file | |
| 134 | |
| 135 :param in_json_fp: Path to input JSON file | |
| 136 :param out_json_fp: Path to output JSON file | |
| 137 :param name: Name of new marker | |
| 138 :param m_length: Length of new marker | |
| 139 :param g_length: List with lengths of genomes from which the new marker has been extracted | |
| 140 :param gca: List with GCA of genomes from which the new marker has been extracted | |
| 141 :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted | |
| 142 :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted | |
| 143 :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted | |
| 144 :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted | |
| 145 :param c_name: List with Name of Class for genomes from which the new marker has been extracted | |
| 146 :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted | |
| 147 :param o_name: List with Name of Order for genomes from which the new marker has been extracted | |
| 148 :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted | |
| 149 :param f_name: List with Name of Family for genomes from which the new marker has been extracted | |
| 150 :param f_id: List with NCBI id of Family for genomes from which the new marker has been extracted | |
| 151 :param g_name: List with Name of Genus for genomes from which the new marker has been extracted | |
| 152 :param g_id: List with NCBI id of Genus for genomes from which the new marker has been extracted | |
| 153 :param s_name: List with Name of Species for genomes from which the new marker has been extracted | |
| 154 :param s_id: List with NCBI id of Species for genomes from which the new marker has been extracted | |
| 155 :param t_name: List with Name of Strain for genomes from which the new marker has been extracted | |
| 156 ''' | |
| 157 metadata = load_from_json(in_json_fp) | |
| 158 | |
| 159 # check that all lists have same size | |
| 160 genome_n = len(g_length) | |
| 161 if len(gca) != genome_n: | |
| 162 raise ValueError("Missing/Extra values in GCA list") | |
| 163 if len(k_name) != genome_n: | |
| 164 raise ValueError("Missing/Extra values in Kingdom name list") | |
| 165 if len(k_id) != genome_n: | |
| 166 raise ValueError("Missing/Extra values in Kingdom ID list") | |
| 167 if len(p_name) != genome_n: | |
| 168 raise ValueError("Missing/Extra values in Phylum name list") | |
| 169 if len(p_id) != genome_n: | |
| 170 raise ValueError("Missing/Extra values in Phylum ID list") | |
| 171 if len(c_name) != genome_n: | |
| 172 raise ValueError("Missing/Extra values in Class name list") | |
| 173 if len(c_id) != genome_n: | |
| 174 raise ValueError("Missing/Extra values in Class ID list") | |
| 175 if len(o_name) != genome_n: | |
| 176 raise ValueError("Missing/Extra values in Order name list") | |
| 177 if len(o_id) != genome_n: | |
| 178 raise ValueError("Missing/Extra values in Order ID list") | |
| 179 if len(f_name) != genome_n: | |
| 180 raise ValueError("Missing/Extra values in Family name list") | |
| 181 if len(f_id) != genome_n: | |
| 182 raise ValueError("Missing/Extra values in Family ID list") | |
| 183 if len(g_name) != genome_n: | |
| 184 raise ValueError("Missing/Extra values in Genus name list") | |
| 185 if len(g_id) != genome_n: | |
| 186 raise ValueError("Missing/Extra values in Genus ID list") | |
| 187 if len(s_name) != genome_n: | |
| 188 raise ValueError("Missing/Extra values in Species name list") | |
| 189 if len(s_id) != genome_n: | |
| 190 raise ValueError("Missing/Extra values in Species ID list") | |
| 191 if len(t_name) != genome_n: | |
| 192 raise ValueError("Missing/Extra values in Strain name list") | |
| 193 | |
| 194 # create dictionary to aggregate genome taxonomies and identify marker taxonomy | |
| 195 taxonomy = { | |
| 196 'k': set(), | |
| 197 'p': set(), | |
| 198 'c': set(), | |
| 199 'o': set(), | |
| 200 'f': set(), | |
| 201 'g': set(), | |
| 202 's': set(), | |
| 203 't': set(), | |
| 204 } | |
| 205 | |
| 206 # parse genomes | |
| 207 for i in range(genome_n): | |
| 208 # add taxonomy of new genome | |
| 209 g_taxo_names = "k__%s|p__%s|c__%s|o__%s|f__%s|g__%s|s__%s|t__%s" % ( | |
| 210 k_name[i], | |
| 211 p_name[i], | |
| 212 c_name[i], | |
| 213 o_name[i], | |
| 214 f_name[i], | |
| 215 g_name[i], | |
| 216 s_name[i], | |
| 217 t_name[i] | |
| 218 ) | |
| 219 g_taxo_ids = "%s|%s|%s|%s|%s|%s|%s" % ( | |
| 220 k_id[i], | |
| 221 p_id[i], | |
| 222 c_id[i], | |
| 223 o_id[i], | |
| 224 f_id[i], | |
| 225 g_id[i], | |
| 226 s_id[i] | |
| 227 ) | |
| 228 metadata['taxonomy'][g_taxo_names] = (g_taxo_ids, g_length[i]) | |
| 229 # aggregate taxon levels using sets | |
| 230 taxonomy['k'].add(k_name[i]) | |
| 231 taxonomy['p'].add(p_name[i]) | |
| 232 taxonomy['c'].add(c_name[i]) | |
| 233 taxonomy['o'].add(o_name[i]) | |
| 234 taxonomy['f'].add(f_name[i]) | |
| 235 taxonomy['g'].add(g_name[i]) | |
| 236 taxonomy['s'].add(s_name[i]) | |
| 237 taxonomy['t'].add(t_name[i]) | |
| 238 | |
| 239 # extract clade and taxon of marker | |
| 240 clade = '' # last level before taxomy of genomes diverge | |
| 241 taxon = '' # combination of levels before divergence | |
| 242 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: | |
| 243 taxo = list(taxonomy[level]) | |
| 244 if len(taxo) == 1: | |
| 245 clade = taxo[0] | |
| 246 taxon = "%s|%s__%s" % (taxon, level, taxo) | |
| 247 | |
| 248 # add information about the new marker | |
| 249 metadata['markers'][name] = { | |
| 250 'clade': clade, | |
| 251 'ext': set(gca), | |
| 252 'len': m_length, | |
| 253 'taxon': taxon | |
| 254 } | |
| 255 | |
| 256 dump_to_json(metadata, out_json_fp) | |
| 257 | |
| 258 | |
| 259 def format_markers(marker_l): | |
| 260 ''' | |
| 261 Format markers | |
| 262 | |
| 263 :param marker_l: list of markers | |
| 264 ''' | |
| 265 markers = [] | |
| 266 for m in marker_l: | |
| 267 m = m.rstrip() | |
| 268 if ' ' in m: | |
| 269 markers.append(m.split(' ')[0]) | |
| 270 else: | |
| 271 markers.append(m) | |
| 272 return markers | |
| 273 | |
| 274 | |
| 275 def get_markers(marker_fp): | |
| 276 ''' | |
| 277 Get markers from a file | |
| 278 | |
| 279 :param marker_fp: Path to file with markers (1 per line) | |
| 280 ''' | |
| 281 # load markers | |
| 282 with open(marker_fp, 'r') as marker_f: | |
| 283 markers = marker_f.readlines() | |
| 284 | |
| 285 # format markers | |
| 286 markers = format_markers(markers) | |
| 287 | |
| 288 return markers | |
| 289 | |
| 290 | |
| 291 def check_not_found_markers(found_markers, original_markers): | |
| 292 ''' | |
| 293 Check list of markers | |
| 294 | |
| 295 :param found_markers: list of found markers | |
| 296 :param original_markers: list of original markers | |
| 297 ''' | |
| 298 if len(found_markers) != len(original_markers): | |
| 299 print('markers not found:') | |
| 300 for m in original_markers: | |
| 301 if m not in found_markers: | |
| 302 print('- "%s"' % m) | |
| 303 | |
| 304 | |
| 305 def prune_taxonomy(in_taxonomy, taxon_s, gca_s): | |
| 306 ''' | |
| 307 Prune taxonomy to keep only listed taxonomy | |
| 308 | |
| 309 :param in_taxonomy: dictionary with list of taxonomy | |
| 310 :param taxon_s: set of taxons to keep | |
| 311 :param gca_s: set of GCA ids to keep | |
| 312 ''' | |
| 313 out_taxonomy = {} | |
| 314 kept_taxonomy = set() | |
| 315 kept_taxons = set() | |
| 316 kept_gca = set() | |
| 317 for t, v in in_taxonomy.items(): | |
| 318 # check if t match element in list of taxon_s | |
| 319 kept_taxon = False | |
| 320 for t_k in taxon_s: | |
| 321 if t_k in t: | |
| 322 kept_taxon = True | |
| 323 out_taxonomy[t] = v | |
| 324 kept_taxonomy.add(t) | |
| 325 kept_taxons.add(t_k) | |
| 326 break | |
| 327 # check if GCA in the taxon id | |
| 328 s = re.search(r'GCA_\d+$', t) | |
| 329 if s: | |
| 330 gca = s[0] | |
| 331 # check if GCA in taxon id is in the list GCA to keep | |
| 332 if gca in gca_s: | |
| 333 kept_gca.add(gca) | |
| 334 if not kept_taxon: | |
| 335 out_taxonomy[t] = v | |
| 336 kept_taxonomy.add(t) | |
| 337 | |
| 338 print('%s kept taxonomy' % len(kept_taxonomy)) | |
| 339 print('%s / %s taxons not found' % (len(taxon_s) - len(kept_taxons), len(taxon_s))) | |
| 340 print('%s / %s GCA taxons not found' % (len(gca_s) - len(kept_gca), len(gca_s))) | |
| 341 return out_taxonomy | |
| 342 | |
| 343 | |
| 344 def remove_markers(in_json_fp, marker_fp, out_json_fp, kept_marker_fp): | |
| 345 ''' | |
| 346 Remove markers from JSON file | |
| 347 | |
| 348 :param in_json_fp: Path to input JSON file | |
| 349 :param marker_fp: Path to file with markers to remove (1 per line) | |
| 350 :param out_json_fp: Path to output JSON file | |
| 351 :param kept_marker_fp: Path to file with kept markers | |
| 352 ''' | |
| 353 in_metadata = load_from_json(in_json_fp) | |
| 354 | |
| 355 # load markers | |
| 356 markers_to_remove = set(get_markers(marker_fp)) | |
| 357 print('%s markers to remove' % len(markers_to_remove)) | |
| 358 | |
| 359 # keep merged_taxon | |
| 360 out_metadata = { | |
| 361 'markers': {}, | |
| 362 'taxonomy': {}, | |
| 363 'merged_taxon': in_metadata['merged_taxon'] | |
| 364 } | |
| 365 | |
| 366 # parse markers to keep | |
| 367 removed_markers = [] | |
| 368 kept_markers = [] | |
| 369 taxons_to_keep = set() | |
| 370 gca_to_keep = set() | |
| 371 for m, v in in_metadata['markers'].items(): | |
| 372 if m not in markers_to_remove: | |
| 373 out_metadata['markers'][m] = v | |
| 374 kept_markers.append(m) | |
| 375 taxons_to_keep.add(v['taxon']) | |
| 376 gca_to_keep.update(v['ext']) | |
| 377 else: | |
| 378 removed_markers.append(m) | |
| 379 print('%s removed markers' % len(removed_markers)) | |
| 380 | |
| 381 # check markers that are not found | |
| 382 check_not_found_markers(removed_markers, markers_to_remove) | |
| 383 | |
| 384 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep | |
| 385 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) | |
| 386 | |
| 387 # save to JSON | |
| 388 dump_to_json(out_metadata, out_json_fp) | |
| 389 | |
| 390 # write list of kept markers | |
| 391 with open(kept_marker_fp, 'w') as kept_marker_f: | |
| 392 for m in kept_markers: | |
| 393 kept_marker_f.write("%s\n" % m) | |
| 394 | |
| 395 | |
| 396 def keep_markers(in_json_fp, marker_fp, out_json_fp): | |
| 397 ''' | |
| 398 Keep markers from JSON file, others will be removed | |
| 399 | |
| 400 :param in_json_fp: Path to input JSON file | |
| 401 :param marker_fp: Path to file with markers to keep (1 per line) | |
| 402 :param out_json_fp: Path to output JSON file | |
| 403 ''' | |
| 404 in_metadata = load_from_json(in_json_fp) | |
| 405 | |
| 406 # load markers | |
| 407 markers_to_keep = set(get_markers(marker_fp)) | |
| 408 print('%s markers to keep' % len(markers_to_keep)) | |
| 409 | |
| 410 # keep merged_taxon | |
| 411 out_metadata = { | |
| 412 'markers': {}, | |
| 413 'taxonomy': {}, | |
| 414 'merged_taxon': in_metadata['merged_taxon'] | |
| 415 } | |
| 416 | |
| 417 # parse markers to keep | |
| 418 kept_markers = [] | |
| 419 taxons_to_keep = set() | |
| 420 gca_to_keep = set() | |
| 421 for m, v in in_metadata['markers'].items(): | |
| 422 if m in markers_to_keep: | |
| 423 out_metadata['markers'][m] = v | |
| 424 kept_markers.append(m) | |
| 425 taxons_to_keep.add(v['taxon']) | |
| 426 gca_to_keep.update(v['ext']) | |
| 427 print('%s kept markers' % len(kept_markers)) | |
| 428 | |
| 429 # check markers that are not found | |
| 430 check_not_found_markers(kept_markers, markers_to_keep) | |
| 431 | |
| 432 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep | |
| 433 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) | |
| 434 | |
| 435 # save to JSON | |
| 436 dump_to_json(out_metadata, out_json_fp) | |
| 437 | |
| 438 | |
| 439 if __name__ == '__main__': | |
| 440 # Read command line | |
| 441 parser = argparse.ArgumentParser(description='Customize MetaPhlan database') | |
| 442 subparsers = parser.add_subparsers(dest='function') | |
| 443 # transform_pkl_to_json subcommand | |
| 444 pkl_to_json_parser = subparsers.add_parser('transform_pkl_to_json', help='Transform Pickle to JSON to get marker metadata') | |
| 445 pkl_to_json_parser.add_argument('--pkl', help="Path to input Pickle file") | |
| 446 pkl_to_json_parser.add_argument('--json', help="Path to output JSON file") | |
| 447 # transform_json_to_pkl subcommand | |
| 448 json_to_pkl_parser = subparsers.add_parser('transform_json_to_pkl', help='Transform JSON to Pickle to push marker metadata') | |
| 449 json_to_pkl_parser.add_argument('--json', help="Path to input JSON file") | |
| 450 json_to_pkl_parser.add_argument('--pkl', help="Path to output Pickle file") | |
| 451 # add_marker subcommand | |
| 452 add_marker_parser = subparsers.add_parser('add_marker', help='Add new marker to JSON file') | |
| 453 add_marker_parser.add_argument('--in_json', help="Path to input JSON file") | |
| 454 add_marker_parser.add_argument('--out_json', help="Path to output JSON file") | |
| 455 add_marker_parser.add_argument('--name', help="Name of new marker") | |
| 456 add_marker_parser.add_argument('--m_length', help="Length of new marker") | |
| 457 add_marker_parser.add_argument('--g_length', help="Length of genome from which the new marker has been extracted", action="append") | |
| 458 add_marker_parser.add_argument('--gca', help="GCA of genome from which the new marker has been extracted", action="append") | |
| 459 add_marker_parser.add_argument('--k_name', help="Name of Kingdom for genome from which the new marker has been extracted", action="append") | |
| 460 add_marker_parser.add_argument('--k_id', help="NCBI id of Kingdom for genome from which the new marker has been extracted", action="append") | |
| 461 add_marker_parser.add_argument('--p_name', help="Name of Phylum for genome from which the new marker has been extracted", action="append") | |
| 462 add_marker_parser.add_argument('--p_id', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append") | |
| 463 add_marker_parser.add_argument('--c_name', help="Name of Class for genome from which the new marker has been extracted", action="append") | |
| 464 add_marker_parser.add_argument('--c_id', help="NCBI id of Class for genome from which the new marker has been extracted", action="append") | |
| 465 add_marker_parser.add_argument('--o_name', help="Name of Order for genome from which the new marker has been extracted", action="append") | |
| 466 add_marker_parser.add_argument('--o_id', help="NCBI id of Order for genome from which the new marker has been extracted", action="append") | |
| 467 add_marker_parser.add_argument('--f_name', help="Name of Family for genome from which the new marker has been extracted", action="append") | |
| 468 add_marker_parser.add_argument('--f_id', help="NCBI id of Family for genome from which the new marker has been extracted", action="append") | |
| 469 add_marker_parser.add_argument('--g_name', help="Name of Genus for genome from which the new marker has been extracted", action="append") | |
| 470 add_marker_parser.add_argument('--g_id', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append") | |
| 471 add_marker_parser.add_argument('--s_name', help="Name of Species for genome from which the new marker has been extracted", action="append") | |
| 472 add_marker_parser.add_argument('--s_id', help="NCBI id of Species for genome from which the new marker has been extracted", action="append") | |
| 473 add_marker_parser.add_argument('--t_name', help="Name of Strain for genome from which the new marker has been extracted", action="append") | |
| 474 # remove_markers subcommand | |
| 475 remove_markers_parser = subparsers.add_parser('remove_markers', help='Remove markers from JSON file') | |
| 476 remove_markers_parser.add_argument('--in_json', help="Path to input JSON file") | |
| 477 remove_markers_parser.add_argument('--markers', help="Path to file with markers to remove (1 per line)") | |
| 478 remove_markers_parser.add_argument('--out_json', help="Path to output JSON file") | |
| 479 remove_markers_parser.add_argument('--kept_markers', help="Path to file with kept markers") | |
| 480 # keep_markers subcommand | |
| 481 keep_markers_parser = subparsers.add_parser('keep_markers', help='Keep markers from JSON file, others will be removed') | |
| 482 keep_markers_parser.add_argument('--in_json', help="Path to input JSON file") | |
| 483 keep_markers_parser.add_argument('--markers', help="Path to file with markers to keep (1 per line)") | |
| 484 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") | |
| 485 | |
| 486 args = parser.parse_args() | |
| 487 | |
| 488 if args.function == 'transform_pkl_to_json': | |
| 489 validate_map_version(Path(args.pkl), 'pkl') | |
| 490 transform_pkl_to_json(Path(args.pkl), Path(args.json)) | |
| 491 elif args.function == 'transform_json_to_pkl': | |
| 492 validate_map_version(Path(args.json), 'json') | |
| 493 transform_json_to_pkl(Path(args.json), Path(args.pkl)) | |
| 494 elif args.function == 'add_marker': | |
| 495 add_marker( | |
| 496 args.in_json, | |
| 497 args.out_json, | |
| 498 args.name, | |
| 499 args.m_length, | |
| 500 args.g_length, | |
| 501 args.gca, | |
| 502 args.k_name, | |
| 503 args.k_id, | |
| 504 args.p_name, | |
| 505 args.p_id, | |
| 506 args.c_name, | |
| 507 args.c_id, | |
| 508 args.o_name, | |
| 509 args.o_id, | |
| 510 args.f_name, | |
| 511 args.f_id, | |
| 512 args.g_name, | |
| 513 args.g_id, | |
| 514 args.s_name, | |
| 515 args.s_id, | |
| 516 args.t_name) | |
| 517 elif args.function == 'remove_markers': | |
| 518 remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers) | |
| 519 elif args.function == 'keep_markers': | |
| 520 keep_markers(args.in_json, args.markers, args.out_json) |
