Mercurial > repos > azomics > clustergrammer_flow
diff clustergrammerIPG.py @ 0:fee56ee2f7ac draft
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/clustergrammer_flow commit b11dfcf10d287c1da91ffb1d5d0148c7f8f61356"
author | azomics |
---|---|
date | Fri, 31 Jul 2020 19:06:45 -0400 |
parents | |
children | c90127c2a1ae |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clustergrammerIPG.py Fri Jul 31 19:06:45 2020 -0400 @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### + +# version 1.1 -- August 2017 +# added checks for consistency between input files +# and upper limit on nb of cluster to look at +# version 1.2 -- June 2018 +# added clustergrammer clustering parameters and normalization options + + +from __future__ import print_function +import sys +import os + +from jinja2 import Environment, FileSystemLoader +import pandas as pd +from clustergrammer import Network + + +def is_integer(s): + try: + int(s) + return True + except ValueError: + return False + + +def get_indices(input_text): + output_list = [] + default_value = ['i.e.:1,2,5', 'i.e.:3,6,7'] + if input_text in default_value: + sys.exit(2) + else: + tmp_col = input_text.split(",") + if len(tmp_col) == 1: + if not tmp_col[0].strip(): + sys.exit(2) + elif not is_integer(tmp_col[0].strip()): + sys.exit(3) + else: + output_list.append(int(tmp_col[0].strip()) - 1) + else: + for c in range(0, len(tmp_col)): + if not is_integer(tmp_col[c].strip()): + sys.exit(3) + else: + output_list.append(int(tmp_col[c].strip()) - 1) + return(output_list) + + +def prepare_heatmap(matrix_input, html_file, html_dir, tools_dir, categories, distance, linkage): + # prepare directory and html + os.mkdir(html_dir) + + env = Environment(loader=FileSystemLoader(tools_dir + "/templates")) + template = env.get_template("clustergrammer.template") + overview = template.render() + with open(html_file, "w") as outf: + outf.write(overview) + + json_output = html_dir + "/mult_view.json" + + net = Network() + net.load_file(matrix_input) + if (categories['row']): + net.add_cats('row', categories['row']) + if (categories['col']): + net.add_cats('col', categories['col']) + net.cluster(dist_type=distance, linkage_type=linkage) + net.write_json_to_file('viz', json_output) + + +if __name__ == "__main__": + + args = sys.argv + categories = { + 'row': [], + 'col': [] + } + norm = {} + + if (len(args) > 7): + df = pd.read_table(args[1]) + names = { + 'row': df.iloc[:, 0], + 'col': df.columns[1:] + } + + tmp_string = "-=-".join(args[8:]) + print (tmp_string + "\n") + # get categories + cats = tmp_string.split("-=-new_cat-=-") + for cat in cats: + tmp_cat = cat.split("-=-") + group = { + "title": tmp_cat[1], + "cats": {} + } + stg_groups = "--".join(tmp_cat[2:]) + groups = stg_groups.split("--new_label--") + cat_indices = [] + for g in groups: + print(g + "\n") + elem = g.split("--") + index_list = get_indices(elem[1]) + index_names = [] + for i in index_list: + if i in cat_indices: + sys.exit(4) + index_names.append(str(names[tmp_cat[0]][i])) + cat_indices = cat_indices + index_list + print(index_names, elem[0], sep="\t") + group["cats"][elem[0]] = index_names + categories[tmp_cat[0]].append(group) + print(categories) + + + prepare_heatmap(args[1], args[2], args[3], args[4], categories, args[5], args[6])