view plotly_tabular_tool/plotlytabular.xml @ 2:08cc7a481af8 draft

Uploaded
author fubar
date Fri, 28 Jul 2023 06:01:53 +0000
parents 449c918bc4cd
children 51a0c2e0fbdf
line wrap: on
line source

<tool name="plotlytabular" id="plotlytabular" version="3.0">
  <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
  <!--Created by toolfactory@galaxy.org at 28/07/2023 15:52:28 using the Galaxy Tool Factory.-->
  <description>Plotly plot generator for any small-ish Galaxy tabular data.</description>
  <requirements>
    <requirement version="1.5.3" type="package">pandas</requirement>
    <requirement version="5.9.0" type="package">plotly</requirement>
  </requirements>
  <stdio>
    <exit_code range="1:" level="fatal"/>
  </stdio>
  <version_command><![CDATA[echo "3.0"]]></version_command>
  <command><![CDATA[python
$runme
--input_tab
$input_tab
--htmlout
$htmlout
--xcol
"$xcol"
--ycol
"$ycol"
--colourcol
"$colourcol"
--hovercol
"$hovercol"
--title
"$title"
--header
"$header"]]></command>
  <configfiles>
    <configfile name="runme"><![CDATA[#raw

import argparse
import sys
import math
import plotly.express as px
import pandas as pd
# Ross Lazarus July 2023
# based on various plotly tutorials
parser = argparse.ArgumentParser()
a = parser.add_argument
a('--input_tab',default='')
a('--header',default='')
a('--htmlout',default="test_run.html")
a('--xcol',default='')
a('--ycol',default='')
a('--colourcol',default='')
a('--hovercol',default='')
a('--title',default='Default plot title')
args = parser.parse_args()
isColour = False
isHover = False
if len(args.colourcol.strip()) > 0:
    isColour = True
if len(args.hovercol.strip()) > 0:
    isHover = True
df = pd.read_csv(args.input_tab, sep='\t')
MAXLEN=35
NCOLS = df.columns.size
defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
testcols = df.columns
if len(args.header.strip()) > 0:
    newcols = args.header.split(',')
    if len(newcols) == NCOLS:
        df.columns = newcols
    else:
        sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
        df.columns = defaultcols
else: # no header supplied - check for a real one that matches the x and y axis column names
    colsok = (args.xcol in testcols) and (args.ycol in testcols) # if they match, probably ok...should use more code and logic..
    if not colsok:
        sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols))
        df.columns = defaultcols
#df['col11'] = [-math.log(x) for x in df['col11']] # convert so large values reflect statistical surprise
if isHover and isColour:
    fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
elif isHover:
    fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
elif isColour:
    fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol)
else:
    fig = px.scatter(df, x=args.xcol, y=args.ycol)
if args.title:
    ftitle=dict(text=args.title, font=dict(size=50))
    fig.update_layout(title=ftitle)
for scatter in fig.data:
    scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
    scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
    if len(args.colourcol.strip()) == 0:
        sl = str(scatter['legendgroup'])
        if len(sl) > MAXLEN:
            scatter['legendgroup'] = sl[:MAXLEN]
fig.write_html(args.htmlout)


#end raw]]></configfile>
  </configfiles>
  <inputs>
    <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
    <param name="xcol" type="text" value="sepal_length" label="x axis for plot" help="Use a column name from the header if the file has one, or use one from the list supplied below, or use col1....colN otherwise to select the correct column"/>
    <param name="ycol" type="text" value="sepal_width" label="y axis for plot" help="Use a column name from the header if the file has one, or use one from the list supplied below, or use col1....colN otherwise to select the correct column"/>
    <param name="colourcol" type="text" value="petal_width" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
    <param name="hovercol" type="text" value="species_id" label="columname for hover string" help="Use a column name from the header if the file has one, or use one from the list supplied below, or use col1....colN otherwise to select the correct column"/>
    <param name="title" type="text" value="Iris data" label="Title for the plot" help="Special characters will probably be escaped so do not use them"/>
    <param name="header" type="text" value="" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/>
  </inputs>
  <outputs>
    <data name="htmlout" format="html" label="htmlout" hidden="false"/>
  </outputs>
  <tests>
    <test>
      <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
      <param name="input_tab" value="input_tab_sample"/>
      <param name="xcol" value="sepal_length"/>
      <param name="ycol" value="sepal_width"/>
      <param name="colourcol" value="petal_width"/>
      <param name="hovercol" value="species_id"/>
      <param name="title" value="Iris data"/>
      <param name="header" value=""/>
    </test>
  </tests>
  <help><![CDATA[

This is a generic version of the plotlyblast specific blastn Galaxy search output file plotter.



.. class:: warningmark

NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.



.. class:: warningmark

NOTE: Columns with very small scientific notation floats will need to be pre-scaled in a way that doesn't confuse plotly.express with their values.



----



The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. 



Assumes you want a hover display for an interactive plot to be informative



Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.



For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.

   qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles



When a header is supplied, the xcol and other column names must match one of those supplied column names.

So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.



Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt



 

------


Script::

    import argparse
    import sys
    import math
    import plotly.express as px
    import pandas as pd
    # Ross Lazarus July 2023
    # based on various plotly tutorials
    parser = argparse.ArgumentParser()
    a = parser.add_argument
    a('--input_tab',default='')
    a('--header',default='')
    a('--htmlout',default="test_run.html")
    a('--xcol',default='')
    a('--ycol',default='')
    a('--colourcol',default='')
    a('--hovercol',default='')
    a('--title',default='Default plot title')
    args = parser.parse_args()
    isColour = False
    isHover = False
    if len(args.colourcol.strip()) > 0:
        isColour = True
    if len(args.hovercol.strip()) > 0:
        isHover = True
    df = pd.read_csv(args.input_tab, sep='\t')
    MAXLEN=35
    NCOLS = df.columns.size
    defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
    testcols = df.columns
    if len(args.header.strip()) > 0:
        newcols = args.header.split(',')
        if len(newcols) == NCOLS:
            df.columns = newcols
        else:
            sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
            df.columns = defaultcols
    else: # no header supplied - check for a real one that matches the x and y axis column names
        colsok = (args.xcol in testcols) and (args.ycol in testcols) # if they match, probably ok...should use more code and logic..
        if not colsok:
            sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols))
            df.columns = defaultcols
    #df['col11'] = [-math.log(x) for x in df['col11']] # convert so large values reflect statistical surprise
    if isHover and isColour:
        fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
    elif isHover:
        fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
    elif isColour:
        fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol)
    else:
        fig = px.scatter(df, x=args.xcol, y=args.ycol)
    if args.title:
        ftitle=dict(text=args.title, font=dict(size=50))
        fig.update_layout(title=ftitle)
    for scatter in fig.data:
        scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
        scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
        if len(args.colourcol.strip()) == 0:
            sl = str(scatter['legendgroup'])
            if len(sl) > MAXLEN:
                scatter['legendgroup'] = sl[:MAXLEN]
    fig.write_html(args.htmlout)

]]></help>
  <citations>
    <citation type="doi">10.1093/bioinformatics/bts573</citation>
  </citations>
</tool>