0
+ − 1 <tool name="plotlyblast" id="plotlyblast" version="3.0">
1
+ − 2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
+ − 3 <!--Created by toolfactory@galaxy.org at 26/07/2023 15:45:12 using the Galaxy Tool Factory.-->
0
+ − 4 <description>Plotly plot generator</description>
+ − 5 <requirements>
1
+ − 6 <requirement type="package">pandas</requirement>
+ − 7 <requirement type="package">plotly</requirement>
0
+ − 8 </requirements>
+ − 9 <stdio>
+ − 10 <exit_code range="1:" level="fatal"/>
+ − 11 </stdio>
+ − 12 <version_command><![CDATA[echo "3.0"]]></version_command>
+ − 13 <command><![CDATA[python
+ − 14 $runme
+ − 15 --input_tab
+ − 16 $input_tab
+ − 17 --htmlout
+ − 18 $htmlout
+ − 19 --xcol
+ − 20 "$xcol"
+ − 21 --ycol
+ − 22 "$ycol"
+ − 23 --colourcol
+ − 24 "$colourcol"
+ − 25 --hovercol
+ − 26 "$hovercol"
+ − 27 --title
+ − 28 "$title"
+ − 29 --header
+ − 30 "$header"]]></command>
+ − 31 <configfiles>
+ − 32 <configfile name="runme"><![CDATA[#raw
+ − 33
+ − 34 import argparse
+ − 35 import sys
+ − 36 import math
+ − 37 import plotly.express as px
+ − 38 import pandas as pd
+ − 39 # Ross Lazarus July 2023
+ − 40 # based on various plotly tutorials
+ − 41 # Expects a blastn search result passed in as the first command line parameter.
+ − 42 parser = argparse.ArgumentParser()
+ − 43 a = parser.add_argument
+ − 44 a('--input_tab',default='')
+ − 45 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
+ − 46 a('--htmlout',default="test_run.html")
+ − 47 a('--xcol',default='gaps')
+ − 48 a('--ycol',default='qseq')
+ − 49 a('--colourcol',default='qaccver')
+ − 50 a('--hovercol',default='qseq')
+ − 51 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
+ − 52 args = parser.parse_args()
+ − 53 df = pd.read_csv(args.input_tab, sep='\t')
+ − 54 NCOLS = df.columns.size
1
+ − 55 MAXLEN = 30 # tricky way to truncate long axis tickmarks
0
+ − 56 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
+ − 57 if len(args.header.strip()) > 0:
+ − 58 newcols = args.header.split(',')
+ − 59 if len(newcols) == NCOLS:
+ − 60 df.columns = newcols
+ − 61 else:
+ − 62 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
+ − 63 df.columns = defaultcols
+ − 64 else:
+ − 65 df.columns = defaultcols
1
+ − 66 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
0
+ − 67 if len(args.colourcol.strip()) == 0:
+ − 68 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
+ − 69 else:
+ − 70 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
+ − 71 if args.title:
1
+ − 72 ftitle=dict(text=args.title, font=dict(size=40), automargin=True)
0
+ − 73 fig.update_layout(title=ftitle)
+ − 74 for scatter in fig.data:
+ − 75 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
+ − 76 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
1
+ − 77 if len(args.colourcol.strip()) == 0:
+ − 78 sl = str(scatter['legendgroup'])
+ − 79 if len(sl) > MAXLEN:
+ − 80 scatter['legendgroup'] = sl[:MAXLEN]
0
+ − 81 fig.write_html(args.htmlout)
+ − 82
1
+ − 83
0
+ − 84 #end raw]]></configfile>
+ − 85 </configfiles>
+ − 86 <inputs>
+ − 87 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
1
+ − 88 <param name="xcol" type="text" value="qaccver" label="x axis for plat" help=""/>
0
+ − 89 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
1
+ − 90 <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
0
+ − 91 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
1
+ − 92 <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/>
+ − 93 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/>
0
+ − 94 </inputs>
+ − 95 <outputs>
+ − 96 <data name="htmlout" format="html" label="htmlout" hidden="false"/>
+ − 97 </outputs>
+ − 98 <tests>
+ − 99 <test>
+ − 100 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
+ − 101 <param name="input_tab" value="input_tab_sample"/>
1
+ − 102 <param name="xcol" value="qaccver"/>
0
+ − 103 <param name="ycol" value="bitscore"/>
1
+ − 104 <param name="colourcol" value="piden"/>
0
+ − 105 <param name="hovercol" value="qseq"/>
1
+ − 106 <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/>
0
+ − 107 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
+ − 108 </test>
+ − 109 </tests>
+ − 110 <help><![CDATA[
+ − 111
+ − 112 This is a specialised version of a generic tabular file plotter tool plotlytabular
+ − 113
+ − 114
+ − 115
+ − 116 .. class:: warningmark
+ − 117
1
+ − 118 NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
+ − 119
0
+ − 120
+ − 121
1
+ − 122 .. class:: warningmark
0
+ − 123
1
+ − 124 NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
0
+ − 125
+ − 126
+ − 127
+ − 128 ----
+ − 129
+ − 130
+ − 131
1
+ − 132 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information.
0
+ − 133
+ − 134
+ − 135
+ − 136 Assumes you want a hover display for an interactive plot to be informative
+ − 137
+ − 138
+ − 139
+ − 140 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
+ − 141
+ − 142
+ − 143
+ − 144 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
+ − 145
+ − 146 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
+ − 147
+ − 148
+ − 149
+ − 150 When a header is supplied, the xcol and other column names must match one of those supplied column names.
+ − 151
+ − 152 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
+ − 153
+ − 154
+ − 155
+ − 156 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
+ − 157
+ − 158
+ − 159
1
+ − 160 .. image:: pbsample.png
+ − 161
+ − 162 :height: 400
+ − 163
+ − 164 :width: 400
+ − 165
+ − 166
+ − 167
0
+ − 168 ------
+ − 169
+ − 170
+ − 171 Script::
+ − 172
+ − 173 import argparse
+ − 174 import sys
+ − 175 import math
+ − 176 import plotly.express as px
+ − 177 import pandas as pd
+ − 178 # Ross Lazarus July 2023
+ − 179 # based on various plotly tutorials
+ − 180 # Expects a blastn search result passed in as the first command line parameter.
+ − 181 parser = argparse.ArgumentParser()
+ − 182 a = parser.add_argument
+ − 183 a('--input_tab',default='')
+ − 184 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
+ − 185 a('--htmlout',default="test_run.html")
+ − 186 a('--xcol',default='gaps')
+ − 187 a('--ycol',default='qseq')
+ − 188 a('--colourcol',default='qaccver')
+ − 189 a('--hovercol',default='qseq')
+ − 190 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
+ − 191 args = parser.parse_args()
+ − 192 df = pd.read_csv(args.input_tab, sep='\t')
+ − 193 NCOLS = df.columns.size
1
+ − 194 MAXLEN = 30 # tricky way to truncate long axis tickmarks
0
+ − 195 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
+ − 196 if len(args.header.strip()) > 0:
+ − 197 newcols = args.header.split(',')
+ − 198 if len(newcols) == NCOLS:
+ − 199 df.columns = newcols
+ − 200 else:
+ − 201 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
+ − 202 df.columns = defaultcols
+ − 203 else:
+ − 204 df.columns = defaultcols
1
+ − 205 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
0
+ − 206 if len(args.colourcol.strip()) == 0:
+ − 207 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
+ − 208 else:
+ − 209 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
+ − 210 if args.title:
1
+ − 211 ftitle=dict(text=args.title, font=dict(size=40), automargin=True)
0
+ − 212 fig.update_layout(title=ftitle)
+ − 213 for scatter in fig.data:
+ − 214 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
+ − 215 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
1
+ − 216 if len(args.colourcol.strip()) == 0:
+ − 217 sl = str(scatter['legendgroup'])
+ − 218 if len(sl) > MAXLEN:
+ − 219 scatter['legendgroup'] = sl[:MAXLEN]
0
+ − 220 fig.write_html(args.htmlout)
+ − 221
+ − 222 ]]></help>
+ − 223 <citations>
+ − 224 <citation type="doi">10.1093/bioinformatics/bts573</citation>
+ − 225 </citations>
+ − 226 </tool>
+ − 227