Mercurial > repos > bgruening > plotly_parallel_coordinates_plot
view paracords_plot.py @ 2:9958188c6195 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author | bgruening |
---|---|
date | Mon, 04 Nov 2019 12:20:51 -0500 |
parents | 7b21a9b5922f |
children |
line wrap: on
line source
import sys import argparse import plotly import plotly.graph_objs as go import pandas as pd import re def main(infile, col_dimensions, categorized, col_color, dimension_mode='by_index'): """ Produce an interactive paracords plotting html Args: infile: str, tabular file col_dimensions: str, comma separated index numbers. For example: "3,4,5" col_color: str, index number dimension_mode: str, one ['by_index', 'by_name'] """ df = pd.read_csv(infile, sep='\t', parse_dates=True) dimensions = [] if dimension_mode not in ['by_index', 'by_name']: raise ValueError("Select dimensions `{}` is not supported!"\ .format(dimension_mode)) if dimension_mode == 'by_index': col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] col_dimensions = df.columns[col_dimensions] else: if '*' not in col_dimensions: col_dimensions = [header.strip() for header in col_dimensions.split(',')] else: pattern = col_dimensions.strip() col_dimensions = [header for header in df.columns if re.search(pattern, header)] for col_name in col_dimensions: values = df[col_name] if categorized == 'boolfalse' and all(type(e) is int for e in values ): dimensions.append( dict( values = values, tickformat = ",.2r", label = col_name) ) elif categorized == 'boolfalse' and all(type(e) is float for e in values ): dimensions.append( dict( values = values, tickformat = "g", label = col_name) ) else: unique_values = list(set(values)) unique_values.sort() # cast to str, fix object indexing unique_values = [repr(e) for e in unique_values] dimensions.append( dict( range = [0, len(unique_values)-1], tickvals = list(range(len(unique_values))), ticktext = unique_values, values = list(map(lambda e: unique_values.index(repr(e)), values )), label = col_name) ) col_color = int(col_color) - 1 colors = df[df.columns[col_color]] if all(type(e) is int for e in colors ): tickformat = ",.2r" elif all(type(e) is float for e in colors ): tickformat = "g" else: sys.exit("Error: the column for coloring must contain all numerical values!") dimensions.append( dict( values = colors, tickformat = tickformat, label = df.columns[col_color] ) ) line = dict( color = colors, colorscale = 'Jet', showscale = True, reversescale = True ) data = [ go.Parcoords( line = line, dimensions = dimensions ) ] plotly.offline.plot(data, filename = "output.html", auto_open=False) if __name__ == "__main__": aparser = argparse.ArgumentParser() aparser.add_argument( "-i", "--input", dest="infile", required=True) aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") aparser.add_argument( "-c", "--col_color", dest="col_color") aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode") args = aparser.parse_args() main(args.infile, args.col_dimensions, args.categorized, args.col_color, args.dimension_mode)