comparison paracords_plot.py @ 2:9958188c6195 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author bgruening
date Mon, 04 Nov 2019 12:20:51 -0500
parents 7b21a9b5922f
children
comparison
equal deleted inserted replaced
1:7b21a9b5922f 2:9958188c6195
1 import sys 1 import sys
2 import argparse 2 import argparse
3 import plotly 3 import plotly
4 import plotly.graph_objs as go 4 import plotly.graph_objs as go
5 import pandas as pd 5 import pandas as pd
6 import re
6 7
7 def main(infile, col_dimensions, categorized, col_color): 8
9 def main(infile, col_dimensions, categorized, col_color,
10 dimension_mode='by_index'):
8 """ 11 """
9 Produce an interactive paracords plotting html 12 Produce an interactive paracords plotting html
10 Args: 13 Args:
11 infile: str, tabular file 14 infile: str, tabular file
12 col_dimensions: str, comma separated index numbers. For example: "3,4,5" 15 col_dimensions: str, comma separated index numbers. For example: "3,4,5"
13 col_color: str, index number 16 col_color: str, index number
17 dimension_mode: str, one ['by_index', 'by_name']
14 """ 18 """
15 df = pd.read_csv(infile, sep='\t', parse_dates=True) 19 df = pd.read_csv(infile, sep='\t', parse_dates=True)
16 20
17 dimensions = [] 21 dimensions = []
18 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] 22
19 for col in col_dimensions: 23 if dimension_mode not in ['by_index', 'by_name']:
20 values = df[df.columns[col]] 24 raise ValueError("Select dimensions `{}` is not supported!"\
25 .format(dimension_mode))
26 if dimension_mode == 'by_index':
27 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')]
28 col_dimensions = df.columns[col_dimensions]
29 else:
30 if '*' not in col_dimensions:
31 col_dimensions = [header.strip() for header in col_dimensions.split(',')]
32 else:
33 pattern = col_dimensions.strip()
34 col_dimensions = [header for header in df.columns
35 if re.search(pattern, header)]
36
37 for col_name in col_dimensions:
38 values = df[col_name]
21 if categorized == 'boolfalse' and all(type(e) is int for e in values ): 39 if categorized == 'boolfalse' and all(type(e) is int for e in values ):
22 dimensions.append( 40 dimensions.append(
23 dict( values = values, 41 dict( values = values,
24 tickformat = ",.2r", 42 tickformat = ",.2r",
25 label = df.columns[col]) 43 label = col_name)
26 ) 44 )
27 elif categorized == 'boolfalse' and all(type(e) is float for e in values ): 45 elif categorized == 'boolfalse' and all(type(e) is float for e in values ):
28 dimensions.append( 46 dimensions.append(
29 dict( values = values, 47 dict( values = values,
30 tickformat = "g", 48 tickformat = "g",
31 label = df.columns[col]) 49 label = col_name)
32 ) 50 )
33 else: 51 else:
34 unique_values = list(set(values)) 52 unique_values = list(set(values))
35 unique_values.sort() 53 unique_values.sort()
54 # cast to str, fix object indexing
55 unique_values = [repr(e) for e in unique_values]
36 dimensions.append( 56 dimensions.append(
37 dict( range = [0, len(unique_values)-1], 57 dict( range = [0, len(unique_values)-1],
38 tickvals = list(range(len(unique_values))), 58 tickvals = list(range(len(unique_values))),
39 ticktext = [str(e) for e in unique_values], 59 ticktext = unique_values,
40 values = list(map(lambda e: unique_values.index(e), values )), 60 values = list(map(lambda e: unique_values.index(repr(e)), values )),
41 label = df.columns[col]) 61 label = col_name)
42 ) 62 )
43 63
44 col_color = int(col_color) - 1 64 col_color = int(col_color) - 1
45 colors = df[df.columns[col_color]] 65 colors = df[df.columns[col_color]]
46 if all(type(e) is int for e in colors ): 66 if all(type(e) is int for e in colors ):
78 aparser = argparse.ArgumentParser() 98 aparser = argparse.ArgumentParser()
79 aparser.add_argument( "-i", "--input", dest="infile", required=True) 99 aparser.add_argument( "-i", "--input", dest="infile", required=True)
80 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") 100 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions")
81 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") 101 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized")
82 aparser.add_argument( "-c", "--col_color", dest="col_color") 102 aparser.add_argument( "-c", "--col_color", dest="col_color")
103 aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode")
83 args = aparser.parse_args() 104 args = aparser.parse_args()
84 105
85 main(args.infile, args.col_dimensions, args.categorized, args.col_color) 106 main(args.infile, args.col_dimensions, args.categorized, args.col_color,
107 args.dimension_mode)