Mercurial > repos > thanhlv > customize_metaphlan_database
annotate customizemetadata.py @ 1:b6e5df1237f2 draft default tip
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
| author | thanhlv | 
|---|---|
| date | Mon, 13 Feb 2023 15:37:20 +0000 | 
| parents | c0473c69ac9f | 
| children | 
| rev | line source | 
|---|---|
| 0 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 2 # -*- coding: utf-8 -*- | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 3 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 4 import argparse | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 5 import bz2 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 6 import json | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 7 import pickle | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 8 import re | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 9 from pathlib import Path | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 10 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 11 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 12 def load_from_json(json_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 13 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 14 Read JSON file with marker metadata | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 15 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 16 :param json_fp: Path to JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 17 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 18 with open(json_fp, 'r') as json_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 19 data = json.load(json_f) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 20 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 21 for m in data['markers']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 22 data['markers'][m]['ext'] = set(data['markers'][m]['ext']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 23 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 24 for t in data['taxonomy']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 25 if isinstance(data['taxonomy'][t], list): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 26 data['taxonomy'][t] = tuple(data['taxonomy'][t]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 27 return data | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 28 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 29 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 30 def dump_to_json(data, json_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 31 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 32 Dump marker metadata to JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 33 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 34 :param json_fp: Path to JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 35 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 36 for m in data['markers']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 37 data['markers'][m]['ext'] = list(data['markers'][m]['ext']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 38 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 39 with open(json_fp, 'w') as json_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 40 json.dump(data, json_f) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 41 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 42 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 43 def transform_pkl_to_json(pkl_fp, json_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 44 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 45 Read Pickle file and drop it to a JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 46 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 47 :param pkl_fp: Path to input Pickle file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 48 :param json_fp: Path to output JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 49 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 50 # load metadata from Pickle file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 51 with bz2.BZ2File(pkl_fp, 'r') as pkl_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 52 in_metadata = pickle.load(pkl_f) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 53 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 54 out_metadata = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 55 'markers': in_metadata['markers'], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 56 'taxonomy': in_metadata['taxonomy'], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 57 'merged_taxon': {} | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 58 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 59 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 60 # transform merged_taxons tuple keys to string | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 61 for k in in_metadata['merged_taxon']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 62 n = ' , '.join(k) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 63 out_metadata[n] = in_metadata['merged_taxon'][k] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 64 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 65 # dump metadata to JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 66 dump_to_json(out_metadata, json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 67 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 68 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 69 def transform_json_to_pkl(json_fp, pkl_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 70 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 71 Read JSON file and drop it to a Pickle file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 72 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 73 :param json_fp: Path to input JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 74 :param pkl_fp: Path to output Pickle file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 75 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 76 # load metadata from JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 77 in_metadata = load_from_json(json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 78 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 79 out_metadata = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 80 'markers': in_metadata['markers'], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 81 'taxonomy': in_metadata['taxonomy'], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 82 'merged_taxon': {} | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 83 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 84 # transform merged_taxons keys to tuple | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 85 for k in in_metadata['merged_taxon']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 86 n = ' , '.split(k) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 87 out_metadata[n] = in_metadata['merged_taxon'][k] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 88 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 89 # Ensure that there are 8 taxonomy levels (for compatibility between Metaphlan v3 and v4) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 90 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 91 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 92 for k in out_metadata['taxonomy']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 93 if out_metadata['taxonomy'][k][0].count('|') == 6: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 94 out_metadata['taxonomy'][k] = (out_metadata['taxonomy'][k][0] + '|', out_metadata['taxonomy'][k][1]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 95 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 96 # dump metadata to Pickle file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 97 with bz2.BZ2File(pkl_fp, 'w') as pkl_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 98 pickle.dump(out_metadata, pkl_f) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 99 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 100 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 101 def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 102 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 103 Add marker to JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 104 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 105 :param in_json_fp: Path to input JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 106 :param out_json_fp: Path to output JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 107 :param name: Name of new marker | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 108 :param m_length: Length of new marker | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 109 :param g_length: List with lengths of genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 110 :param gca: List with GCA of genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 111 :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 112 :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 113 :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 114 :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 115 :param c_name: List with Name of Class for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 116 :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 117 :param o_name: List with Name of Order for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 118 :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 119 :param f_name: List with Name of Family for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 120 :param f_id: List with NCBI id of Family for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 121 :param g_name: List with Name of Genus for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 122 :param g_id: List with NCBI id of Genus for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 123 :param s_name: List with Name of Species for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 124 :param s_id: List with NCBI id of Species for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 125 :param t_name: List with Name of Strain for genomes from which the new marker has been extracted | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 126 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 127 metadata = load_from_json(in_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 128 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 129 # check that all lists have same size | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 130 genome_n = len(g_length) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 131 if len(gca) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 132 raise ValueError("Missing/Extra values in GCA list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 133 if len(k_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 134 raise ValueError("Missing/Extra values in Kingdom name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 135 if len(k_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 136 raise ValueError("Missing/Extra values in Kingdom ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 137 if len(p_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 138 raise ValueError("Missing/Extra values in Phylum name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 139 if len(p_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 140 raise ValueError("Missing/Extra values in Phylum ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 141 if len(c_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 142 raise ValueError("Missing/Extra values in Class name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 143 if len(c_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 144 raise ValueError("Missing/Extra values in Class ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 145 if len(o_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 146 raise ValueError("Missing/Extra values in Order name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 147 if len(o_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 148 raise ValueError("Missing/Extra values in Order ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 149 if len(f_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 150 raise ValueError("Missing/Extra values in Family name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 151 if len(f_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 152 raise ValueError("Missing/Extra values in Family ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 153 if len(g_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 154 raise ValueError("Missing/Extra values in Genus name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 155 if len(g_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 156 raise ValueError("Missing/Extra values in Genus ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 157 if len(s_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 158 raise ValueError("Missing/Extra values in Species name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 159 if len(s_id) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 160 raise ValueError("Missing/Extra values in Species ID list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 161 if len(t_name) != genome_n: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 162 raise ValueError("Missing/Extra values in Strain name list") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 163 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 164 # create dictionary to aggregate genome taxonomies and identify marker taxonomy | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 165 taxonomy = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 166 'k': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 167 'p': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 168 'c': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 169 'o': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 170 'f': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 171 'g': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 172 's': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 173 't': set(), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 174 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 175 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 176 # parse genomes | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 177 for i in range(genome_n): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 178 # add taxonomy of new genome | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 179 g_taxo_names = "k__%s|p__%s|c__%s|o__%s|f__%s|g__%s|s__%s|t__%s" % ( | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 180 k_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 181 p_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 182 c_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 183 o_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 184 f_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 185 g_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 186 s_name[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 187 t_name[i] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 188 ) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 189 g_taxo_ids = "%s|%s|%s|%s|%s|%s|%s" % ( | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 190 k_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 191 p_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 192 c_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 193 o_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 194 f_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 195 g_id[i], | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 196 s_id[i] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 197 ) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 198 metadata['taxonomy'][g_taxo_names] = (g_taxo_ids, g_length[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 199 # aggregate taxon levels using sets | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 200 taxonomy['k'].add(k_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 201 taxonomy['p'].add(p_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 202 taxonomy['c'].add(c_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 203 taxonomy['o'].add(o_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 204 taxonomy['f'].add(f_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 205 taxonomy['g'].add(g_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 206 taxonomy['s'].add(s_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 207 taxonomy['t'].add(t_name[i]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 208 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 209 # extract clade and taxon of marker | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 210 clade = '' # last level before taxomy of genomes diverge | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 211 taxon = '' # combination of levels before divergence | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 212 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 213 taxo = list(taxonomy[level]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 214 if len(taxo) == 1: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 215 clade = taxo[0] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 216 taxon = "%s|%s__%s" % (taxon, level, taxo) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 217 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 218 # add information about the new marker | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 219 metadata['markers'][name] = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 220 'clade': clade, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 221 'ext': set(gca), | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 222 'len': m_length, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 223 'taxon': taxon | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 224 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 225 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 226 dump_to_json(metadata, out_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 227 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 228 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 229 def format_markers(marker_l): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 230 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 231 Format markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 232 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 233 :param marker_l: list of markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 234 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 235 markers = [] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 236 for m in marker_l: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 237 m = m.rstrip() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 238 if ' ' in m: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 239 markers.append(m.split(' ')[0]) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 240 else: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 241 markers.append(m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 242 return markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 243 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 244 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 245 def get_markers(marker_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 246 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 247 Get markers from a file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 248 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 249 :param marker_fp: Path to file with markers (1 per line) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 250 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 251 # load markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 252 with open(marker_fp, 'r') as marker_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 253 markers = marker_f.readlines() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 254 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 255 # format markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 256 markers = format_markers(markers) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 257 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 258 return markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 259 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 260 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 261 def check_not_found_markers(found_markers, original_markers): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 262 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 263 Check list of markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 264 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 265 :param found_markers: list of found markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 266 :param original_markers: list of original markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 267 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 268 if len(found_markers) != len(original_markers): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 269 print('markers not found:') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 270 for m in original_markers: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 271 if m not in found_markers: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 272 print('- "%s"' % m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 273 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 274 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 275 def prune_taxonomy(in_taxonomy, taxon_s, gca_s): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 276 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 277 Prune taxonomy to keep only listed taxonomy | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 278 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 279 :param in_taxonomy: dictionary with list of taxonomy | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 280 :param taxon_s: set of taxons to keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 281 :param gca_s: set of GCA ids to keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 282 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 283 out_taxonomy = {} | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 284 kept_taxonomy = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 285 kept_taxons = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 286 kept_gca = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 287 for t, v in in_taxonomy.items(): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 288 # check if t match element in list of taxon_s | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 289 kept_taxon = False | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 290 for t_k in taxon_s: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 291 if t_k in t: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 292 kept_taxon = True | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 293 out_taxonomy[t] = v | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 294 kept_taxonomy.add(t) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 295 kept_taxons.add(t_k) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 296 break | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 297 # check if GCA in the taxon id | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 298 s = re.search(r'GCA_\d+$', t) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 299 if s: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 300 gca = s[0] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 301 # check if GCA in taxon id is in the list GCA to keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 302 if gca in gca_s: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 303 kept_gca.add(gca) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 304 if not kept_taxon: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 305 out_taxonomy[t] = v | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 306 kept_taxonomy.add(t) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 307 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 308 print('%s kept taxonomy' % len(kept_taxonomy)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 309 print('%s / %s taxons not found' % (len(taxon_s) - len(kept_taxons), len(taxon_s))) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 310 print('%s / %s GCA taxons not found' % (len(gca_s) - len(kept_gca), len(gca_s))) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 311 return out_taxonomy | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 312 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 313 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 314 def remove_markers(in_json_fp, marker_fp, out_json_fp, kept_marker_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 315 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 316 Remove markers from JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 317 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 318 :param in_json_fp: Path to input JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 319 :param marker_fp: Path to file with markers to remove (1 per line) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 320 :param out_json_fp: Path to output JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 321 :param kept_marker_fp: Path to file with kept markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 322 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 323 in_metadata = load_from_json(in_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 324 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 325 # load markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 326 markers_to_remove = set(get_markers(marker_fp)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 327 print('%s markers to remove' % len(markers_to_remove)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 328 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 329 # keep merged_taxon | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 330 out_metadata = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 331 'markers': {}, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 332 'taxonomy': {}, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 333 'merged_taxon': in_metadata['merged_taxon'] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 334 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 335 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 336 # parse markers to keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 337 removed_markers = [] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 338 kept_markers = [] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 339 taxons_to_keep = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 340 gca_to_keep = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 341 for m, v in in_metadata['markers'].items(): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 342 if m not in markers_to_remove: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 343 out_metadata['markers'][m] = v | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 344 kept_markers.append(m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 345 taxons_to_keep.add(v['taxon']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 346 gca_to_keep.update(v['ext']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 347 else: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 348 removed_markers.append(m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 349 print('%s removed markers' % len(removed_markers)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 350 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 351 # check markers that are not found | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 352 check_not_found_markers(removed_markers, markers_to_remove) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 353 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 354 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 355 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 356 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 357 # save to JSON | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 358 dump_to_json(out_metadata, out_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 359 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 360 # write list of kept markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 361 with open(kept_marker_fp, 'w') as kept_marker_f: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 362 for m in kept_markers: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 363 kept_marker_f.write("%s\n" % m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 364 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 365 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 366 def keep_markers(in_json_fp, marker_fp, out_json_fp): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 367 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 368 Keep markers from JSON file, others will be removed | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 369 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 370 :param in_json_fp: Path to input JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 371 :param marker_fp: Path to file with markers to keep (1 per line) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 372 :param out_json_fp: Path to output JSON file | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 373 ''' | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 374 in_metadata = load_from_json(in_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 375 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 376 # load markers | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 377 markers_to_keep = set(get_markers(marker_fp)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 378 print('%s markers to keep' % len(markers_to_keep)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 379 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 380 # keep merged_taxon | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 381 out_metadata = { | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 382 'markers': {}, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 383 'taxonomy': {}, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 384 'merged_taxon': in_metadata['merged_taxon'] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 385 } | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 386 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 387 # parse markers to keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 388 kept_markers = [] | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 389 taxons_to_keep = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 390 gca_to_keep = set() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 391 for m, v in in_metadata['markers'].items(): | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 392 if m in markers_to_keep: | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 393 out_metadata['markers'][m] = v | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 394 kept_markers.append(m) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 395 taxons_to_keep.add(v['taxon']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 396 gca_to_keep.update(v['ext']) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 397 print('%s kept markers' % len(kept_markers)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 398 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 399 # check markers that are not found | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 400 check_not_found_markers(kept_markers, markers_to_keep) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 401 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 402 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 403 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 404 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 405 # save to JSON | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 406 dump_to_json(out_metadata, out_json_fp) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 407 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 408 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 409 if __name__ == '__main__': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 410 # Read command line | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 411 parser = argparse.ArgumentParser(description='Customize MetaPhlan database') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 412 subparsers = parser.add_subparsers(dest='function') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 413 # transform_pkl_to_json subcommand | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 414 pkl_to_json_parser = subparsers.add_parser('transform_pkl_to_json', help='Transform Pickle to JSON to get marker metadata') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 415 pkl_to_json_parser.add_argument('--pkl', help="Path to input Pickle file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 416 pkl_to_json_parser.add_argument('--json', help="Path to output JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 417 # transform_json_to_pkl subcommand | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 418 json_to_pkl_parser = subparsers.add_parser('transform_json_to_pkl', help='Transform JSON to Pickle to push marker metadata') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 419 json_to_pkl_parser.add_argument('--json', help="Path to input JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 420 json_to_pkl_parser.add_argument('--pkl', help="Path to output Pickle file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 421 # add_marker subcommand | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 422 add_marker_parser = subparsers.add_parser('add_marker', help='Add new marker to JSON file') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 423 add_marker_parser.add_argument('--in_json', help="Path to input JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 424 add_marker_parser.add_argument('--out_json', help="Path to output JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 425 add_marker_parser.add_argument('--name', help="Name of new marker") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 426 add_marker_parser.add_argument('--m_length', help="Length of new marker") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 427 add_marker_parser.add_argument('--g_length', help="Length of genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 428 add_marker_parser.add_argument('--gca', help="GCA of genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 429 add_marker_parser.add_argument('--k_name', help="Name of Kingdom for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 430 add_marker_parser.add_argument('--k_id', help="NCBI id of Kingdom for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 431 add_marker_parser.add_argument('--p_name', help="Name of Phylum for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 432 add_marker_parser.add_argument('--p_id', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 433 add_marker_parser.add_argument('--c_name', help="Name of Class for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 434 add_marker_parser.add_argument('--c_id', help="NCBI id of Class for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 435 add_marker_parser.add_argument('--o_name', help="Name of Order for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 436 add_marker_parser.add_argument('--o_id', help="NCBI id of Order for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 437 add_marker_parser.add_argument('--f_name', help="Name of Family for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 438 add_marker_parser.add_argument('--f_id', help="NCBI id of Family for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 439 add_marker_parser.add_argument('--g_name', help="Name of Genus for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 440 add_marker_parser.add_argument('--g_id', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 441 add_marker_parser.add_argument('--s_name', help="Name of Species for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 442 add_marker_parser.add_argument('--s_id', help="NCBI id of Species for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 443 add_marker_parser.add_argument('--t_name', help="Name of Strain for genome from which the new marker has been extracted", action="append") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 444 # remove_markers subcommand | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 445 remove_markers_parser = subparsers.add_parser('remove_markers', help='Remove markers from JSON file') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 446 remove_markers_parser.add_argument('--in_json', help="Path to input JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 447 remove_markers_parser.add_argument('--markers', help="Path to file with markers to remove (1 per line)") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 448 remove_markers_parser.add_argument('--out_json', help="Path to output JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 449 remove_markers_parser.add_argument('--kept_markers', help="Path to file with kept markers") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 450 # keep_markers subcommand | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 451 keep_markers_parser = subparsers.add_parser('keep_markers', help='Keep markers from JSON file, others will be removed') | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 452 keep_markers_parser.add_argument('--in_json', help="Path to input JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 453 keep_markers_parser.add_argument('--markers', help="Path to file with markers to keep (1 per line)") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 454 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 455 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 456 args = parser.parse_args() | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 457 | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 458 if args.function == 'transform_pkl_to_json': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 459 transform_pkl_to_json(Path(args.pkl), Path(args.json)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 460 elif args.function == 'transform_json_to_pkl': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 461 transform_json_to_pkl(Path(args.json), Path(args.pkl)) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 462 elif args.function == 'add_marker': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 463 add_marker( | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 464 args.in_json, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 465 args.out_json, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 466 args.name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 467 args.m_length, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 468 args.g_length, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 469 args.gca, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 470 args.k_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 471 args.k_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 472 args.p_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 473 args.p_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 474 args.c_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 475 args.c_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 476 args.o_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 477 args.o_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 478 args.f_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 479 args.f_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 480 args.g_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 481 args.g_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 482 args.s_name, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 483 args.s_id, | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 484 args.t_name) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 485 elif args.function == 'remove_markers': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 486 remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers) | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 487 elif args.function == 'keep_markers': | 
| 
c0473c69ac9f
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
 thanhlv parents: diff
changeset | 488 keep_markers(args.in_json, args.markers, args.out_json) | 
