Mercurial > repos > fubar > blasttools_search_test
comparison plotly_blast_tool/plotlyblast.xml @ 7:bb99f2c0e358 draft
Updated by regenerating with latest ToolFactory revision
| author | fubar |
|---|---|
| date | Fri, 04 Aug 2023 01:52:13 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 6:c23d0b047de9 | 7:bb99f2c0e358 |
|---|---|
| 1 <tool name="plotlyblast" id="plotlyblast" version="3.0"> | |
| 2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> | |
| 3 <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.--> | |
| 4 <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description> | |
| 5 <requirements> | |
| 6 <requirement version="1.5.3" type="package">pandas</requirement> | |
| 7 <requirement version="5.9.0" type="package">plotly</requirement> | |
| 8 <requirement version="0.2.1" type="package">python-kaleido</requirement> | |
| 9 </requirements> | |
| 10 <stdio> | |
| 11 <exit_code range="1:" level="fatal"/> | |
| 12 </stdio> | |
| 13 <version_command><![CDATA[echo "3.0"]]></version_command> | |
| 14 <command><![CDATA[python | |
| 15 $runme | |
| 16 --input_tab | |
| 17 $input_tab | |
| 18 --htmlout | |
| 19 $htmlout | |
| 20 --xcol | |
| 21 "$xcol" | |
| 22 --ycol | |
| 23 "$ycol" | |
| 24 --colourcol | |
| 25 "$colourcol" | |
| 26 --hovercol | |
| 27 "$hovercol" | |
| 28 --title | |
| 29 "$title" | |
| 30 --header | |
| 31 "$header" | |
| 32 --image_type | |
| 33 "$outputimagetype"]]></command> | |
| 34 <configfiles> | |
| 35 <configfile name="runme"><![CDATA[#raw | |
| 36 | |
| 37 import argparse | |
| 38 import shutil | |
| 39 import sys | |
| 40 import math | |
| 41 import plotly.express as px | |
| 42 import pandas as pd | |
| 43 # Ross Lazarus July 2023 | |
| 44 # based on various plotly tutorials | |
| 45 # Expects a blastn search result passed in as the first command line parameter. | |
| 46 parser = argparse.ArgumentParser() | |
| 47 a = parser.add_argument | |
| 48 a('--input_tab',default='') | |
| 49 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') | |
| 50 a('--htmlout',default="test_run.html") | |
| 51 a('--xcol',default='') | |
| 52 a('--ycol',default='') | |
| 53 a('--colourcol',default='') | |
| 54 a('--hovercol',default='') | |
| 55 a('--title',default='test blast plot') | |
| 56 a('--image_type',default='short_html') | |
| 57 args = parser.parse_args() | |
| 58 df = pd.read_csv(args.input_tab, sep='\t') | |
| 59 NCOLS = df.columns.size | |
| 60 MAXLEN = 30 # tricky way to truncate long axis tickmarks | |
| 61 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] | |
| 62 if len(args.header.strip()) > 0: | |
| 63 newcols = args.header.split(',') | |
| 64 if len(newcols) == NCOLS: | |
| 65 df.columns = newcols | |
| 66 else: | |
| 67 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) | |
| 68 df.columns = defaultcols | |
| 69 else: | |
| 70 df.columns = defaultcols | |
| 71 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise | |
| 72 if len(args.colourcol.strip()) == 0: | |
| 73 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) | |
| 74 else: | |
| 75 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) | |
| 76 if args.title: | |
| 77 ftitle=dict(text=args.title, font=dict(size=40)) | |
| 78 fig.update_layout(title=ftitle) | |
| 79 for scatter in fig.data: | |
| 80 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] | |
| 81 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] | |
| 82 if len(args.colourcol.strip()) == 0: | |
| 83 sl = str(scatter['legendgroup']) | |
| 84 if len(sl) > MAXLEN: | |
| 85 scatter['legendgroup'] = sl[:MAXLEN] | |
| 86 if args.image_type == "short_html": | |
| 87 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') | |
| 88 elif args.image_type == "long_html": | |
| 89 fig.write_html(args.htmlout) | |
| 90 elif args.image_type == "small_png": | |
| 91 ht = 768 | |
| 92 wdth = 1024 | |
| 93 fig.write_image('plotly.png', height=ht, width=wdth) | |
| 94 shutil.copyfile('plotly.png', args.htmlout) | |
| 95 else: | |
| 96 ht = 1200 | |
| 97 wdth = 1920 | |
| 98 fig.write_image('plotly.png', height=ht, width=wdth) | |
| 99 shutil.copyfile('plotly.png', args.htmlout) | |
| 100 | |
| 101 | |
| 102 | |
| 103 #end raw]]></configfile> | |
| 104 </configfiles> | |
| 105 <inputs> | |
| 106 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/> | |
| 107 <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/> | |
| 108 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/> | |
| 109 <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/> | |
| 110 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/> | |
| 111 <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/> | |
| 112 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/> | |
| 113 <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents."> | |
| 114 <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option> | |
| 115 <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option> | |
| 116 <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option> | |
| 117 <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option> | |
| 118 </param> | |
| 119 </inputs> | |
| 120 <outputs> | |
| 121 <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false"> | |
| 122 <change_format> | |
| 123 <when input="outputimagetype" format="png" value="small_png"/> | |
| 124 <when input="outputimagetype" format="png" value="large_png"/> | |
| 125 </change_format> | |
| 126 </data> | |
| 127 </outputs> | |
| 128 <tests> | |
| 129 <test> | |
| 130 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/> | |
| 131 <param name="input_tab" value="input_tab_sample"/> | |
| 132 <param name="xcol" value="qaccver"/> | |
| 133 <param name="ycol" value="bitscore"/> | |
| 134 <param name="colourcol" value="piden"/> | |
| 135 <param name="hovercol" value="qseq"/> | |
| 136 <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/> | |
| 137 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/> | |
| 138 <param name="outputimagetype" value="short_html"/> | |
| 139 </test> | |
| 140 </tests> | |
| 141 <help><![CDATA[ | |
| 142 | |
| 143 This is a specialised version of a generic tabular file plotter tool plotlytabular | |
| 144 | |
| 145 PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored. | |
| 146 | |
| 147 HTML interactive plots are best for a few thousand data points at most because | |
| 148 | |
| 149 the hover information becomes uncontrollable with very dense points. | |
| 150 | |
| 151 Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded. | |
| 152 | |
| 153 The long format is useful if potentially viewed offline. | |
| 154 | |
| 155 | |
| 156 | |
| 157 .. class:: warningmark | |
| 158 | |
| 159 NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished. | |
| 160 | |
| 161 | |
| 162 | |
| 163 .. class:: warningmark | |
| 164 | |
| 165 NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values. | |
| 166 | |
| 167 | |
| 168 | |
| 169 ---- | |
| 170 | |
| 171 | |
| 172 | |
| 173 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. | |
| 174 | |
| 175 | |
| 176 | |
| 177 Assumes you want a hover display for an interactive plot to be informative | |
| 178 | |
| 179 | |
| 180 | |
| 181 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter. | |
| 182 | |
| 183 | |
| 184 | |
| 185 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column. | |
| 186 | |
| 187 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles | |
| 188 | |
| 189 | |
| 190 | |
| 191 When a header is supplied, the xcol and other column names must match one of those supplied column names. | |
| 192 | |
| 193 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied. | |
| 194 | |
| 195 | |
| 196 | |
| 197 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt | |
| 198 | |
| 199 | |
| 200 | |
| 201 | |
| 202 | |
| 203 | |
| 204 | |
| 205 ------ | |
| 206 | |
| 207 | |
| 208 Script:: | |
| 209 | |
| 210 import argparse | |
| 211 import shutil | |
| 212 import sys | |
| 213 import math | |
| 214 import plotly.express as px | |
| 215 import pandas as pd | |
| 216 # Ross Lazarus July 2023 | |
| 217 # based on various plotly tutorials | |
| 218 # Expects a blastn search result passed in as the first command line parameter. | |
| 219 parser = argparse.ArgumentParser() | |
| 220 a = parser.add_argument | |
| 221 a('--input_tab',default='') | |
| 222 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') | |
| 223 a('--htmlout',default="test_run.html") | |
| 224 a('--xcol',default='') | |
| 225 a('--ycol',default='') | |
| 226 a('--colourcol',default='') | |
| 227 a('--hovercol',default='') | |
| 228 a('--title',default='test blast plot') | |
| 229 a('--image_type',default='short_html') | |
| 230 args = parser.parse_args() | |
| 231 df = pd.read_csv(args.input_tab, sep='\t') | |
| 232 NCOLS = df.columns.size | |
| 233 MAXLEN = 30 # tricky way to truncate long axis tickmarks | |
| 234 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] | |
| 235 if len(args.header.strip()) > 0: | |
| 236 newcols = args.header.split(',') | |
| 237 if len(newcols) == NCOLS: | |
| 238 df.columns = newcols | |
| 239 else: | |
| 240 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) | |
| 241 df.columns = defaultcols | |
| 242 else: | |
| 243 df.columns = defaultcols | |
| 244 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise | |
| 245 if len(args.colourcol.strip()) == 0: | |
| 246 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) | |
| 247 else: | |
| 248 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) | |
| 249 if args.title: | |
| 250 ftitle=dict(text=args.title, font=dict(size=40)) | |
| 251 fig.update_layout(title=ftitle) | |
| 252 for scatter in fig.data: | |
| 253 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] | |
| 254 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] | |
| 255 if len(args.colourcol.strip()) == 0: | |
| 256 sl = str(scatter['legendgroup']) | |
| 257 if len(sl) > MAXLEN: | |
| 258 scatter['legendgroup'] = sl[:MAXLEN] | |
| 259 if args.image_type == "short_html": | |
| 260 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') | |
| 261 elif args.image_type == "long_html": | |
| 262 fig.write_html(args.htmlout) | |
| 263 elif args.image_type == "small_png": | |
| 264 ht = 768 | |
| 265 wdth = 1024 | |
| 266 fig.write_image('plotly.png', height=ht, width=wdth) | |
| 267 shutil.copyfile('plotly.png', args.htmlout) | |
| 268 else: | |
| 269 ht = 1200 | |
| 270 wdth = 1920 | |
| 271 fig.write_image('plotly.png', height=ht, width=wdth) | |
| 272 shutil.copyfile('plotly.png', args.htmlout) | |
| 273 | |
| 274 ]]></help> | |
| 275 <citations> | |
| 276 <citation type="doi">10.1093/bioinformatics/bts573</citation> | |
| 277 </citations> | |
| 278 </tool> | |
| 279 |
