Mercurial > repos > fubar > plotly_blast_plot
diff plotly_blast_tool/plotlyblast.xml @ 0:61cc57e069c0 draft
Initial commit
author | fubar |
---|---|
date | Tue, 25 Jul 2023 05:36:54 +0000 |
parents | |
children | 6fbd48e9c950 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plotly_blast_tool/plotlyblast.xml Tue Jul 25 05:36:54 2023 +0000 @@ -0,0 +1,209 @@ +<tool name="plotlyblast" id="plotlyblast" version="3.0"> + <!--Source in git at: https://github.com/fubar2/galaxy--> + <!--Created by toolfactory@galaxy.org at 25/07/2023 14:13:15 using the Galaxy Tool Factory.--> + <description>Plotly plot generator</description> + <requirements> + <requirement type="package" version="1.5.3">pandas</requirement> + <requirement type="package" version="5.9.0">plotly</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + <version_command><![CDATA[echo "3.0"]]></version_command> + <command><![CDATA[python +$runme +--input_tab +$input_tab +--htmlout +$htmlout +--xcol +"$xcol" +--ycol +"$ycol" +--colourcol +"$colourcol" +--hovercol +"$hovercol" +--title +"$title" +--header +"$header"]]></command> + <configfiles> + <configfile name="runme"><![CDATA[#raw + +import argparse +import sys +import math +import plotly.express as px +import pandas as pd +# Ross Lazarus July 2023 +# based on various plotly tutorials +# Expects a blastn search result passed in as the first command line parameter. +parser = argparse.ArgumentParser() +a = parser.add_argument +a('--input_tab',default='') +a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') +a('--htmlout',default="test_run.html") +a('--xcol',default='gaps') +a('--ycol',default='qseq') +a('--colourcol',default='qaccver') +a('--hovercol',default='qseq') +a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ') +args = parser.parse_args() +df = pd.read_csv(args.input_tab, sep='\t') +NCOLS = df.columns.size +MAXLEN = 40 # tricky way to truncate long axis tickmarks +defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] +if len(args.header.strip()) > 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + df.columns = newcols + else: + sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) + df.columns = defaultcols +else: + df.columns = defaultcols +df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise +if len(args.colourcol.strip()) == 0: + fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) +else: + fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) +if args.title: + ftitle=dict(text=args.title, font=dict(size=50)) + fig.update_layout(title=ftitle) +for scatter in fig.data: + scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] + scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] +fig.write_html(args.htmlout) + +#end raw]]></configfile> + </configfiles> + <inputs> + <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/> + <param name="xcol" type="text" value="mismatch" label="x axis for plat" help=""/> + <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/> + <param name="colourcol" type="text" value="qaccver" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/> + <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/> + <param name="title" type="text" value="Test title default" label="Title for the plot" help=""/> + <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file__sq__s columns" help="Default for Galaxy blast outputs with 25 columns."/> + </inputs> + <outputs> + <data name="htmlout" format="html" label="htmlout" hidden="false"/> + </outputs> + <tests> + <test> + <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/> + <param name="input_tab" value="input_tab_sample"/> + <param name="xcol" value="mismatch"/> + <param name="ycol" value="bitscore"/> + <param name="colourcol" value="qaccver"/> + <param name="hovercol" value="qseq"/> + <param name="title" value="Test title default"/> + <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/> + </test> + </tests> + <help><![CDATA[ + +This is a specialised version of a generic tabular file plotter tool plotlytabular + + + +.. class:: warningmark + + + +Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished. + +The evalues WILL BE CONVERTED as -log10(evalue) so they are scaled in a way that doesn't confuse plotly.express with the tiny values. + + + +---- + + + +The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. + + + +Assumes you want a hover display for an interactive plot to be informative + + + +Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter. + + + +For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column. + + qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles + + + +When a header is supplied, the xcol and other column names must match one of those supplied column names. + +So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied. + + + +Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt + + + +------ + + +Script:: + + import argparse + import sys + import math + import plotly.express as px + import pandas as pd + # Ross Lazarus July 2023 + # based on various plotly tutorials + # Expects a blastn search result passed in as the first command line parameter. + parser = argparse.ArgumentParser() + a = parser.add_argument + a('--input_tab',default='') + a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') + a('--htmlout',default="test_run.html") + a('--xcol',default='gaps') + a('--ycol',default='qseq') + a('--colourcol',default='qaccver') + a('--hovercol',default='qseq') + a('--title',default='test blast plot of mismatch by bitscore coloured by qaccver ') + args = parser.parse_args() + df = pd.read_csv(args.input_tab, sep='\t') + NCOLS = df.columns.size + MAXLEN = 40 # tricky way to truncate long axis tickmarks + defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] + if len(args.header.strip()) > 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + df.columns = newcols + else: + sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) + df.columns = defaultcols + else: + df.columns = defaultcols + df['evalue'] = [-math.log(x) for x in df['evalue']] # convert so large values reflect statistical surprise + if len(args.colourcol.strip()) == 0: + fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) + else: + fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) + if args.title: + ftitle=dict(text=args.title, font=dict(size=50), automargin=True) + fig.update_layout(title=ftitle) + for scatter in fig.data: + scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] + scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] + fig.write_html(args.htmlout) + fig.show() + +]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts573</citation> + </citations> +</tool> +