comparison plotly_blast_tool/plotlyblast.xml @ 7:bb99f2c0e358 draft

Updated by regenerating with latest ToolFactory revision
author fubar
date Fri, 04 Aug 2023 01:52:13 +0000
parents
children
comparison
equal deleted inserted replaced
6:c23d0b047de9 7:bb99f2c0e358
1 <tool name="plotlyblast" id="plotlyblast" version="3.0">
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
3 <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.-->
4 <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description>
5 <requirements>
6 <requirement version="1.5.3" type="package">pandas</requirement>
7 <requirement version="5.9.0" type="package">plotly</requirement>
8 <requirement version="0.2.1" type="package">python-kaleido</requirement>
9 </requirements>
10 <stdio>
11 <exit_code range="1:" level="fatal"/>
12 </stdio>
13 <version_command><![CDATA[echo "3.0"]]></version_command>
14 <command><![CDATA[python
15 $runme
16 --input_tab
17 $input_tab
18 --htmlout
19 $htmlout
20 --xcol
21 "$xcol"
22 --ycol
23 "$ycol"
24 --colourcol
25 "$colourcol"
26 --hovercol
27 "$hovercol"
28 --title
29 "$title"
30 --header
31 "$header"
32 --image_type
33 "$outputimagetype"]]></command>
34 <configfiles>
35 <configfile name="runme"><![CDATA[#raw
36
37 import argparse
38 import shutil
39 import sys
40 import math
41 import plotly.express as px
42 import pandas as pd
43 # Ross Lazarus July 2023
44 # based on various plotly tutorials
45 # Expects a blastn search result passed in as the first command line parameter.
46 parser = argparse.ArgumentParser()
47 a = parser.add_argument
48 a('--input_tab',default='')
49 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
50 a('--htmlout',default="test_run.html")
51 a('--xcol',default='')
52 a('--ycol',default='')
53 a('--colourcol',default='')
54 a('--hovercol',default='')
55 a('--title',default='test blast plot')
56 a('--image_type',default='short_html')
57 args = parser.parse_args()
58 df = pd.read_csv(args.input_tab, sep='\t')
59 NCOLS = df.columns.size
60 MAXLEN = 30 # tricky way to truncate long axis tickmarks
61 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
62 if len(args.header.strip()) > 0:
63 newcols = args.header.split(',')
64 if len(newcols) == NCOLS:
65 df.columns = newcols
66 else:
67 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
68 df.columns = defaultcols
69 else:
70 df.columns = defaultcols
71 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
72 if len(args.colourcol.strip()) == 0:
73 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
74 else:
75 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
76 if args.title:
77 ftitle=dict(text=args.title, font=dict(size=40))
78 fig.update_layout(title=ftitle)
79 for scatter in fig.data:
80 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
81 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
82 if len(args.colourcol.strip()) == 0:
83 sl = str(scatter['legendgroup'])
84 if len(sl) > MAXLEN:
85 scatter['legendgroup'] = sl[:MAXLEN]
86 if args.image_type == "short_html":
87 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
88 elif args.image_type == "long_html":
89 fig.write_html(args.htmlout)
90 elif args.image_type == "small_png":
91 ht = 768
92 wdth = 1024
93 fig.write_image('plotly.png', height=ht, width=wdth)
94 shutil.copyfile('plotly.png', args.htmlout)
95 else:
96 ht = 1200
97 wdth = 1920
98 fig.write_image('plotly.png', height=ht, width=wdth)
99 shutil.copyfile('plotly.png', args.htmlout)
100
101
102
103 #end raw]]></configfile>
104 </configfiles>
105 <inputs>
106 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
107 <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/>
108 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
109 <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
110 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
111 <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/>
112 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/>
113 <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents.">
114 <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option>
115 <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option>
116 <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option>
117 <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option>
118 </param>
119 </inputs>
120 <outputs>
121 <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false">
122 <change_format>
123 <when input="outputimagetype" format="png" value="small_png"/>
124 <when input="outputimagetype" format="png" value="large_png"/>
125 </change_format>
126 </data>
127 </outputs>
128 <tests>
129 <test>
130 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
131 <param name="input_tab" value="input_tab_sample"/>
132 <param name="xcol" value="qaccver"/>
133 <param name="ycol" value="bitscore"/>
134 <param name="colourcol" value="piden"/>
135 <param name="hovercol" value="qseq"/>
136 <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/>
137 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
138 <param name="outputimagetype" value="short_html"/>
139 </test>
140 </tests>
141 <help><![CDATA[
142
143 This is a specialised version of a generic tabular file plotter tool plotlytabular
144
145 PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored.
146
147 HTML interactive plots are best for a few thousand data points at most because
148
149 the hover information becomes uncontrollable with very dense points.
150
151 Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded.
152
153 The long format is useful if potentially viewed offline.
154
155
156
157 .. class:: warningmark
158
159 NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
160
161
162
163 .. class:: warningmark
164
165 NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
166
167
168
169 ----
170
171
172
173 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information.
174
175
176
177 Assumes you want a hover display for an interactive plot to be informative
178
179
180
181 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
182
183
184
185 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
186
187 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
188
189
190
191 When a header is supplied, the xcol and other column names must match one of those supplied column names.
192
193 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
194
195
196
197 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
198
199
200
201
202
203
204
205 ------
206
207
208 Script::
209
210 import argparse
211 import shutil
212 import sys
213 import math
214 import plotly.express as px
215 import pandas as pd
216 # Ross Lazarus July 2023
217 # based on various plotly tutorials
218 # Expects a blastn search result passed in as the first command line parameter.
219 parser = argparse.ArgumentParser()
220 a = parser.add_argument
221 a('--input_tab',default='')
222 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
223 a('--htmlout',default="test_run.html")
224 a('--xcol',default='')
225 a('--ycol',default='')
226 a('--colourcol',default='')
227 a('--hovercol',default='')
228 a('--title',default='test blast plot')
229 a('--image_type',default='short_html')
230 args = parser.parse_args()
231 df = pd.read_csv(args.input_tab, sep='\t')
232 NCOLS = df.columns.size
233 MAXLEN = 30 # tricky way to truncate long axis tickmarks
234 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
235 if len(args.header.strip()) > 0:
236 newcols = args.header.split(',')
237 if len(newcols) == NCOLS:
238 df.columns = newcols
239 else:
240 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
241 df.columns = defaultcols
242 else:
243 df.columns = defaultcols
244 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
245 if len(args.colourcol.strip()) == 0:
246 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
247 else:
248 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
249 if args.title:
250 ftitle=dict(text=args.title, font=dict(size=40))
251 fig.update_layout(title=ftitle)
252 for scatter in fig.data:
253 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
254 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
255 if len(args.colourcol.strip()) == 0:
256 sl = str(scatter['legendgroup'])
257 if len(sl) > MAXLEN:
258 scatter['legendgroup'] = sl[:MAXLEN]
259 if args.image_type == "short_html":
260 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
261 elif args.image_type == "long_html":
262 fig.write_html(args.htmlout)
263 elif args.image_type == "small_png":
264 ht = 768
265 wdth = 1024
266 fig.write_image('plotly.png', height=ht, width=wdth)
267 shutil.copyfile('plotly.png', args.htmlout)
268 else:
269 ht = 1200
270 wdth = 1920
271 fig.write_image('plotly.png', height=ht, width=wdth)
272 shutil.copyfile('plotly.png', args.htmlout)
273
274 ]]></help>
275 <citations>
276 <citation type="doi">10.1093/bioinformatics/bts573</citation>
277 </citations>
278 </tool>
279