diff plotly_blast_tool/plotlyblast.xml @ 7:bb99f2c0e358 draft

Updated by regenerating with latest ToolFactory revision
author fubar
date Fri, 04 Aug 2023 01:52:13 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plotly_blast_tool/plotlyblast.xml	Fri Aug 04 01:52:13 2023 +0000
@@ -0,0 +1,279 @@
+<tool name="plotlyblast" id="plotlyblast" version="3.0">
+  <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
+  <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.-->
+  <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description>
+  <requirements>
+    <requirement version="1.5.3" type="package">pandas</requirement>
+    <requirement version="5.9.0" type="package">plotly</requirement>
+    <requirement version="0.2.1" type="package">python-kaleido</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" level="fatal"/>
+  </stdio>
+  <version_command><![CDATA[echo "3.0"]]></version_command>
+  <command><![CDATA[python
+$runme
+--input_tab
+$input_tab
+--htmlout
+$htmlout
+--xcol
+"$xcol"
+--ycol
+"$ycol"
+--colourcol
+"$colourcol"
+--hovercol
+"$hovercol"
+--title
+"$title"
+--header
+"$header"
+--image_type
+"$outputimagetype"]]></command>
+  <configfiles>
+    <configfile name="runme"><![CDATA[#raw
+
+import argparse
+import shutil
+import sys
+import math
+import plotly.express as px
+import pandas as pd
+# Ross Lazarus July 2023
+# based on various plotly tutorials
+# Expects a blastn search result passed in as the first command line parameter.
+parser = argparse.ArgumentParser()
+a = parser.add_argument
+a('--input_tab',default='')
+a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
+a('--htmlout',default="test_run.html")
+a('--xcol',default='')
+a('--ycol',default='')
+a('--colourcol',default='')
+a('--hovercol',default='')
+a('--title',default='test blast plot')
+a('--image_type',default='short_html')
+args = parser.parse_args()
+df = pd.read_csv(args.input_tab, sep='\t')
+NCOLS = df.columns.size
+MAXLEN = 30 # tricky way to truncate long axis tickmarks
+defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
+if len(args.header.strip()) > 0:
+    newcols = args.header.split(',')
+    if len(newcols) == NCOLS:
+        df.columns = newcols
+    else:
+        sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
+        df.columns = defaultcols
+else:
+    df.columns = defaultcols
+df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
+if len(args.colourcol.strip()) == 0:
+    fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
+else:
+    fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
+if args.title:
+    ftitle=dict(text=args.title, font=dict(size=40))
+    fig.update_layout(title=ftitle)
+for scatter in fig.data:
+    scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
+    scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
+    if len(args.colourcol.strip()) == 0:
+        sl = str(scatter['legendgroup'])
+        if len(sl) > MAXLEN:
+            scatter['legendgroup'] = sl[:MAXLEN]
+if args.image_type == "short_html":
+    fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
+elif args.image_type == "long_html":
+    fig.write_html(args.htmlout)
+elif args.image_type == "small_png":
+    ht = 768
+    wdth = 1024
+    fig.write_image('plotly.png', height=ht, width=wdth)
+    shutil.copyfile('plotly.png', args.htmlout)
+else:
+    ht = 1200
+    wdth = 1920
+    fig.write_image('plotly.png', height=ht, width=wdth)
+    shutil.copyfile('plotly.png', args.htmlout)
+
+
+
+#end raw]]></configfile>
+  </configfiles>
+  <inputs>
+    <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
+    <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/>
+    <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
+    <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
+    <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
+    <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/>
+    <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/>
+    <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents.">
+      <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option>
+      <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option>
+      <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option>
+      <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false">
+      <change_format>
+        <when input="outputimagetype" format="png" value="small_png"/>
+        <when input="outputimagetype" format="png" value="large_png"/>
+      </change_format>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
+      <param name="input_tab" value="input_tab_sample"/>
+      <param name="xcol" value="qaccver"/>
+      <param name="ycol" value="bitscore"/>
+      <param name="colourcol" value="piden"/>
+      <param name="hovercol" value="qseq"/>
+      <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/>
+      <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
+      <param name="outputimagetype" value="short_html"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+
+This is a specialised version of a generic tabular file plotter tool plotlytabular
+
+PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored.
+
+HTML interactive plots are best for a few thousand data points at most because
+
+the hover information becomes uncontrollable with very dense points.
+
+Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded.
+
+The long format is useful if potentially viewed offline.
+
+
+
+.. class:: warningmark
+
+NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
+
+
+
+.. class:: warningmark
+
+NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
+
+
+
+----
+
+
+
+The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. 
+
+
+
+Assumes you want a hover display for an interactive plot to be informative
+
+
+
+Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
+
+
+
+For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
+
+   qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
+
+
+
+When a header is supplied, the xcol and other column names must match one of those supplied column names.
+
+So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
+
+
+
+Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
+
+
+
+
+
+ 
+
+------
+
+
+Script::
+
+    import argparse
+    import shutil
+    import sys
+    import math
+    import plotly.express as px
+    import pandas as pd
+    # Ross Lazarus July 2023
+    # based on various plotly tutorials
+    # Expects a blastn search result passed in as the first command line parameter.
+    parser = argparse.ArgumentParser()
+    a = parser.add_argument
+    a('--input_tab',default='')
+    a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
+    a('--htmlout',default="test_run.html")
+    a('--xcol',default='')
+    a('--ycol',default='')
+    a('--colourcol',default='')
+    a('--hovercol',default='')
+    a('--title',default='test blast plot')
+    a('--image_type',default='short_html')
+    args = parser.parse_args()
+    df = pd.read_csv(args.input_tab, sep='\t')
+    NCOLS = df.columns.size
+    MAXLEN = 30 # tricky way to truncate long axis tickmarks
+    defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
+    if len(args.header.strip()) > 0:
+        newcols = args.header.split(',')
+        if len(newcols) == NCOLS:
+            df.columns = newcols
+        else:
+            sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
+            df.columns = defaultcols
+    else:
+        df.columns = defaultcols
+    df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
+    if len(args.colourcol.strip()) == 0:
+        fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
+    else:
+        fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
+    if args.title:
+        ftitle=dict(text=args.title, font=dict(size=40))
+        fig.update_layout(title=ftitle)
+    for scatter in fig.data:
+        scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
+        scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
+        if len(args.colourcol.strip()) == 0:
+            sl = str(scatter['legendgroup'])
+            if len(sl) > MAXLEN:
+                scatter['legendgroup'] = sl[:MAXLEN]
+    if args.image_type == "short_html":
+        fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
+    elif args.image_type == "long_html":
+        fig.write_html(args.htmlout)
+    elif args.image_type == "small_png":
+        ht = 768
+        wdth = 1024
+        fig.write_image('plotly.png', height=ht, width=wdth)
+        shutil.copyfile('plotly.png', args.htmlout)
+    else:
+        ht = 1200
+        wdth = 1920
+        fig.write_image('plotly.png', height=ht, width=wdth)
+        shutil.copyfile('plotly.png', args.htmlout)
+
+]]></help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/bts573</citation>
+  </citations>
+</tool>
+