comparison clustergrammerIPG.py @ 0:fee56ee2f7ac draft

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/clustergrammer_flow commit b11dfcf10d287c1da91ffb1d5d0148c7f8f61356"
author azomics
date Fri, 31 Jul 2020 19:06:45 -0400
parents
children c90127c2a1ae
comparison
equal deleted inserted replaced
-1:000000000000 0:fee56ee2f7ac
1 #!/usr/bin/env python
2
3 ######################################################################
4 # Copyright (c) 2016 Northrop Grumman.
5 # All rights reserved.
6 ######################################################################
7
8 # version 1.1 -- August 2017
9 # added checks for consistency between input files
10 # and upper limit on nb of cluster to look at
11 # version 1.2 -- June 2018
12 # added clustergrammer clustering parameters and normalization options
13
14
15 from __future__ import print_function
16 import sys
17 import os
18
19 from jinja2 import Environment, FileSystemLoader
20 import pandas as pd
21 from clustergrammer import Network
22
23
24 def is_integer(s):
25 try:
26 int(s)
27 return True
28 except ValueError:
29 return False
30
31
32 def get_indices(input_text):
33 output_list = []
34 default_value = ['i.e.:1,2,5', 'i.e.:3,6,7']
35 if input_text in default_value:
36 sys.exit(2)
37 else:
38 tmp_col = input_text.split(",")
39 if len(tmp_col) == 1:
40 if not tmp_col[0].strip():
41 sys.exit(2)
42 elif not is_integer(tmp_col[0].strip()):
43 sys.exit(3)
44 else:
45 output_list.append(int(tmp_col[0].strip()) - 1)
46 else:
47 for c in range(0, len(tmp_col)):
48 if not is_integer(tmp_col[c].strip()):
49 sys.exit(3)
50 else:
51 output_list.append(int(tmp_col[c].strip()) - 1)
52 return(output_list)
53
54
55 def prepare_heatmap(matrix_input, html_file, html_dir, tools_dir, categories, distance, linkage):
56 # prepare directory and html
57 os.mkdir(html_dir)
58
59 env = Environment(loader=FileSystemLoader(tools_dir + "/templates"))
60 template = env.get_template("clustergrammer.template")
61 overview = template.render()
62 with open(html_file, "w") as outf:
63 outf.write(overview)
64
65 json_output = html_dir + "/mult_view.json"
66
67 net = Network()
68 net.load_file(matrix_input)
69 if (categories['row']):
70 net.add_cats('row', categories['row'])
71 if (categories['col']):
72 net.add_cats('col', categories['col'])
73 net.cluster(dist_type=distance, linkage_type=linkage)
74 net.write_json_to_file('viz', json_output)
75
76
77 if __name__ == "__main__":
78
79 args = sys.argv
80 categories = {
81 'row': [],
82 'col': []
83 }
84 norm = {}
85
86 if (len(args) > 7):
87 df = pd.read_table(args[1])
88 names = {
89 'row': df.iloc[:, 0],
90 'col': df.columns[1:]
91 }
92
93 tmp_string = "-=-".join(args[8:])
94 print (tmp_string + "\n")
95 # get categories
96 cats = tmp_string.split("-=-new_cat-=-")
97 for cat in cats:
98 tmp_cat = cat.split("-=-")
99 group = {
100 "title": tmp_cat[1],
101 "cats": {}
102 }
103 stg_groups = "--".join(tmp_cat[2:])
104 groups = stg_groups.split("--new_label--")
105 cat_indices = []
106 for g in groups:
107 print(g + "\n")
108 elem = g.split("--")
109 index_list = get_indices(elem[1])
110 index_names = []
111 for i in index_list:
112 if i in cat_indices:
113 sys.exit(4)
114 index_names.append(str(names[tmp_cat[0]][i]))
115 cat_indices = cat_indices + index_list
116 print(index_names, elem[0], sep="\t")
117 group["cats"][elem[0]] = index_names
118 categories[tmp_cat[0]].append(group)
119 print(categories)
120
121
122 prepare_heatmap(args[1], args[2], args[3], args[4], categories, args[5], args[6])