0
|
1 <tool name="plotlyblast" id="plotlyblast" version="3.0">
|
|
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
|
|
3 <!--Created by toolfactory@galaxy.org at 25/07/2023 14:13:15 using the Galaxy Tool Factory.-->
|
|
4 <description>Plotly plot generator</description>
|
|
5 <requirements>
|
|
6 <requirement type="package" version="1.5.3">pandas</requirement>
|
|
7 <requirement type="package" version="5.9.0">plotly</requirement>
|
|
8 </requirements>
|
|
9 <stdio>
|
|
10 <exit_code range="1:" level="fatal"/>
|
|
11 </stdio>
|
|
12 <version_command><![CDATA[echo "3.0"]]></version_command>
|
|
13 <command><![CDATA[python
|
|
14 $runme
|
|
15 --input_tab
|
|
16 $input_tab
|
|
17 --htmlout
|
|
18 $htmlout
|
|
19 --xcol
|
|
20 "$xcol"
|
|
21 --ycol
|
|
22 "$ycol"
|
|
23 --colourcol
|
|
24 "$colourcol"
|
|
25 --hovercol
|
|
26 "$hovercol"
|
|
27 --title
|
|
28 "$title"
|
|
29 --header
|
|
30 "$header"]]></command>
|
|
31 <configfiles>
|
|
32 <configfile name="runme"><![CDATA[#raw
|
|
33
|
|
34 import argparse
|
|
35 import sys
|
|
36 import math
|
|
37 import plotly.express as px
|
|
38 import pandas as pd
|
|
39 # Ross Lazarus July 2023
|
|
40 # based on various plotly tutorials
|
|
41 # Expects a blastn search result passed in as the first command line parameter.
|
|
42 parser = argparse.ArgumentParser()
|
|
43 a = parser.add_argument
|
|
44 a('--input_tab',default='')
|
|
45 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
|
|
46 a('--htmlout',default="test_run.html")
|
|
47 a('--xcol',default='gaps')
|
|
48 a('--ycol',default='qseq')
|
|
49 a('--colourcol',default='qaccver')
|
|
50 a('--hovercol',default='qseq')
|
|
51 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
|
|
52 args = parser.parse_args()
|
|
53 df = pd.read_csv(args.input_tab, sep='\t')
|
|
54 NCOLS = df.columns.size
|
|
55 MAXLEN = 40 # tricky way to truncate long axis tickmarks
|
|
56 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
|
|
57 if len(args.header.strip()) > 0:
|
|
58 newcols = args.header.split(',')
|
|
59 if len(newcols) == NCOLS:
|
|
60 df.columns = newcols
|
|
61 else:
|
|
62 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
|
|
63 df.columns = defaultcols
|
|
64 else:
|
|
65 df.columns = defaultcols
|
|
66 df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise
|
|
67 if len(args.colourcol.strip()) == 0:
|
|
68 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
|
|
69 else:
|
|
70 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
|
|
71 if args.title:
|
|
72 ftitle=dict(text=args.title, font=dict(size=50))
|
|
73 fig.update_layout(title=ftitle)
|
|
74 for scatter in fig.data:
|
|
75 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
|
|
76 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
|
|
77 fig.write_html(args.htmlout)
|
|
78
|
|
79 #end raw]]></configfile>
|
|
80 </configfiles>
|
|
81 <inputs>
|
|
82 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
|
|
83 <param name="xcol" type="text" value="mismatch" label="x axis for plat" help=""/>
|
|
84 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
|
|
85 <param name="colourcol" type="text" value="qaccver" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
|
|
86 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
|
|
87 <param name="title" type="text" value="Test title default" label="Title for the plot" help=""/>
|
|
88 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file__sq__s columns" help="Default for Galaxy blast outputs with 25 columns."/>
|
|
89 </inputs>
|
|
90 <outputs>
|
|
91 <data name="htmlout" format="html" label="htmlout" hidden="false"/>
|
|
92 </outputs>
|
|
93 <tests>
|
|
94 <test>
|
|
95 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
|
|
96 <param name="input_tab" value="input_tab_sample"/>
|
|
97 <param name="xcol" value="mismatch"/>
|
|
98 <param name="ycol" value="bitscore"/>
|
|
99 <param name="colourcol" value="qaccver"/>
|
|
100 <param name="hovercol" value="qseq"/>
|
|
101 <param name="title" value="Test title default"/>
|
|
102 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
|
|
103 </test>
|
|
104 </tests>
|
|
105 <help><![CDATA[
|
|
106
|
|
107 This is a specialised version of a generic tabular file plotter tool plotlytabular
|
|
108
|
|
109
|
|
110
|
|
111 .. class:: warningmark
|
|
112
|
|
113
|
|
114
|
|
115 Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
|
|
116
|
|
117 The evalues WILL BE CONVERTED as -log10(evalue) so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
|
|
118
|
|
119
|
|
120
|
|
121 ----
|
|
122
|
|
123
|
|
124
|
|
125 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information.
|
|
126
|
|
127
|
|
128
|
|
129 Assumes you want a hover display for an interactive plot to be informative
|
|
130
|
|
131
|
|
132
|
|
133 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
|
|
134
|
|
135
|
|
136
|
|
137 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
|
|
138
|
|
139 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
|
|
140
|
|
141
|
|
142
|
|
143 When a header is supplied, the xcol and other column names must match one of those supplied column names.
|
|
144
|
|
145 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
|
|
146
|
|
147
|
|
148
|
|
149 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
|
|
150
|
|
151
|
|
152
|
|
153 ------
|
|
154
|
|
155
|
|
156 Script::
|
|
157
|
|
158 import argparse
|
|
159 import sys
|
|
160 import math
|
|
161 import plotly.express as px
|
|
162 import pandas as pd
|
|
163 # Ross Lazarus July 2023
|
|
164 # based on various plotly tutorials
|
|
165 # Expects a blastn search result passed in as the first command line parameter.
|
|
166 parser = argparse.ArgumentParser()
|
|
167 a = parser.add_argument
|
|
168 a('--input_tab',default='')
|
|
169 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
|
|
170 a('--htmlout',default="test_run.html")
|
|
171 a('--xcol',default='gaps')
|
|
172 a('--ycol',default='qseq')
|
|
173 a('--colourcol',default='qaccver')
|
|
174 a('--hovercol',default='qseq')
|
|
175 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
|
|
176 args = parser.parse_args()
|
|
177 df = pd.read_csv(args.input_tab, sep='\t')
|
|
178 NCOLS = df.columns.size
|
|
179 MAXLEN = 40 # tricky way to truncate long axis tickmarks
|
|
180 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
|
|
181 if len(args.header.strip()) > 0:
|
|
182 newcols = args.header.split(',')
|
|
183 if len(newcols) == NCOLS:
|
|
184 df.columns = newcols
|
|
185 else:
|
|
186 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
|
|
187 df.columns = defaultcols
|
|
188 else:
|
|
189 df.columns = defaultcols
|
|
190 df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise
|
|
191 if len(args.colourcol.strip()) == 0:
|
|
192 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
|
|
193 else:
|
|
194 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
|
|
195 if args.title:
|
|
196 ftitle=dict(text=args.title, font=dict(size=50), automargin=True)
|
|
197 fig.update_layout(title=ftitle)
|
|
198 for scatter in fig.data:
|
|
199 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
|
|
200 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
|
|
201 fig.write_html(args.htmlout)
|
|
202 fig.show()
|
|
203
|
|
204 ]]></help>
|
|
205 <citations>
|
|
206 <citation type="doi">10.1093/bioinformatics/bts573</citation>
|
|
207 </citations>
|
|
208 </tool>
|
|
209
|