comparison plotly_blast_tool/plotlyblast.xml @ 0:61cc57e069c0 draft

Initial commit
author fubar
date Tue, 25 Jul 2023 05:36:54 +0000
parents
children 6fbd48e9c950
comparison
equal deleted inserted replaced
-1:000000000000 0:61cc57e069c0
1 <tool name="plotlyblast" id="plotlyblast" version="3.0">
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
3 <!--Created by toolfactory@galaxy.org at 25/07/2023 14:13:15 using the Galaxy Tool Factory.-->
4 <description>Plotly plot generator</description>
5 <requirements>
6 <requirement type="package" version="1.5.3">pandas</requirement>
7 <requirement type="package" version="5.9.0">plotly</requirement>
8 </requirements>
9 <stdio>
10 <exit_code range="1:" level="fatal"/>
11 </stdio>
12 <version_command><![CDATA[echo "3.0"]]></version_command>
13 <command><![CDATA[python
14 $runme
15 --input_tab
16 $input_tab
17 --htmlout
18 $htmlout
19 --xcol
20 "$xcol"
21 --ycol
22 "$ycol"
23 --colourcol
24 "$colourcol"
25 --hovercol
26 "$hovercol"
27 --title
28 "$title"
29 --header
30 "$header"]]></command>
31 <configfiles>
32 <configfile name="runme"><![CDATA[#raw
33
34 import argparse
35 import sys
36 import math
37 import plotly.express as px
38 import pandas as pd
39 # Ross Lazarus July 2023
40 # based on various plotly tutorials
41 # Expects a blastn search result passed in as the first command line parameter.
42 parser = argparse.ArgumentParser()
43 a = parser.add_argument
44 a('--input_tab',default='')
45 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
46 a('--htmlout',default="test_run.html")
47 a('--xcol',default='gaps')
48 a('--ycol',default='qseq')
49 a('--colourcol',default='qaccver')
50 a('--hovercol',default='qseq')
51 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
52 args = parser.parse_args()
53 df = pd.read_csv(args.input_tab, sep='\t')
54 NCOLS = df.columns.size
55 MAXLEN = 40 # tricky way to truncate long axis tickmarks
56 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
57 if len(args.header.strip()) > 0:
58 newcols = args.header.split(',')
59 if len(newcols) == NCOLS:
60 df.columns = newcols
61 else:
62 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
63 df.columns = defaultcols
64 else:
65 df.columns = defaultcols
66 df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise
67 if len(args.colourcol.strip()) == 0:
68 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
69 else:
70 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
71 if args.title:
72 ftitle=dict(text=args.title, font=dict(size=50))
73 fig.update_layout(title=ftitle)
74 for scatter in fig.data:
75 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
76 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
77 fig.write_html(args.htmlout)
78
79 #end raw]]></configfile>
80 </configfiles>
81 <inputs>
82 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
83 <param name="xcol" type="text" value="mismatch" label="x axis for plat" help=""/>
84 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
85 <param name="colourcol" type="text" value="qaccver" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
86 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
87 <param name="title" type="text" value="Test title default" label="Title for the plot" help=""/>
88 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file__sq__s columns" help="Default for Galaxy blast outputs with 25 columns."/>
89 </inputs>
90 <outputs>
91 <data name="htmlout" format="html" label="htmlout" hidden="false"/>
92 </outputs>
93 <tests>
94 <test>
95 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
96 <param name="input_tab" value="input_tab_sample"/>
97 <param name="xcol" value="mismatch"/>
98 <param name="ycol" value="bitscore"/>
99 <param name="colourcol" value="qaccver"/>
100 <param name="hovercol" value="qseq"/>
101 <param name="title" value="Test title default"/>
102 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
103 </test>
104 </tests>
105 <help><![CDATA[
106
107 This is a specialised version of a generic tabular file plotter tool plotlytabular
108
109
110
111 .. class:: warningmark
112
113
114
115 Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
116
117 The evalues WILL BE CONVERTED as -log10(evalue) so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
118
119
120
121 ----
122
123
124
125 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information.
126
127
128
129 Assumes you want a hover display for an interactive plot to be informative
130
131
132
133 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
134
135
136
137 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
138
139 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
140
141
142
143 When a header is supplied, the xcol and other column names must match one of those supplied column names.
144
145 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
146
147
148
149 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
150
151
152
153 ------
154
155
156 Script::
157
158 import argparse
159 import sys
160 import math
161 import plotly.express as px
162 import pandas as pd
163 # Ross Lazarus July 2023
164 # based on various plotly tutorials
165 # Expects a blastn search result passed in as the first command line parameter.
166 parser = argparse.ArgumentParser()
167 a = parser.add_argument
168 a('--input_tab',default='')
169 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
170 a('--htmlout',default="test_run.html")
171 a('--xcol',default='gaps')
172 a('--ycol',default='qseq')
173 a('--colourcol',default='qaccver')
174 a('--hovercol',default='qseq')
175 a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ')
176 args = parser.parse_args()
177 df = pd.read_csv(args.input_tab, sep='\t')
178 NCOLS = df.columns.size
179 MAXLEN = 40 # tricky way to truncate long axis tickmarks
180 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
181 if len(args.header.strip()) > 0:
182 newcols = args.header.split(',')
183 if len(newcols) == NCOLS:
184 df.columns = newcols
185 else:
186 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
187 df.columns = defaultcols
188 else:
189 df.columns = defaultcols
190 df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise
191 if len(args.colourcol.strip()) == 0:
192 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
193 else:
194 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
195 if args.title:
196 ftitle=dict(text=args.title, font=dict(size=50), automargin=True)
197 fig.update_layout(title=ftitle)
198 for scatter in fig.data:
199 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
200 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
201 fig.write_html(args.htmlout)
202 fig.show()
203
204 ]]></help>
205 <citations>
206 <citation type="doi">10.1093/bioinformatics/bts573</citation>
207 </citations>
208 </tool>
209