Mercurial > repos > bgruening > pandas_rolling_window
diff pandas_rolling.xml @ 0:a06f7b5c4dc7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/pandas_rolling_window commit bdbedf42854d16bb00c396045007d4baece0a869
author | bgruening |
---|---|
date | Mon, 20 May 2019 08:42:54 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pandas_rolling.xml Mon May 20 08:42:54 2019 -0400 @@ -0,0 +1,175 @@ +<tool id="pandas_rolling_window" name="Rolling window" version="0.1"> + <description>over a dataframe (e.g. for data smoothing)</description> + <requirements> + <requirement type="package" version="1.16.3">numpy</requirement> + <requirement type="package" version="1.2.1">scipy</requirement> + <requirement type="package" version="0.24.2">pandas</requirement> + </requirements> + <command> +<![CDATA[ + + cat '$pandas_script' && + python '$pandas_script' + +]]> + </command> + <configfiles> + <configfile name="pandas_script"><![CDATA[ +import argparse +import sys + +import pandas as pd + +kwargs = dict() +window_type = '$smooth_function.smooth_function_opts_selector' + +#if $smooth_function.smooth_function_opts_selector == 'gaussian': +kwargs.update({'std': $smooth_function.gaussian_std}) +#elif $smooth_function.smooth_function_opts_selector == 'general_gaussian': +kwargs = ({'power': $smooth_function.ggaussian_power, 'width': $smooth_function.ggaussian_width}) +#elif $smooth_function.smooth_function_opts_selector == 'kaiser': +kwargs.update({'beta': $smooth_function.kaiser_beta}) +#elif $smooth_function.smooth_function_opts_selector == 'slepian': +kwargs.update({'width': $smooth_function.slepian_width}) +#end if + +df = pd.read_csv('${infile}', sep='\t', index_col=None, header=None, dtype={'strand': object} ) + +#if $group_column: +df['aggregate'] = df.groupby( int($group_column)-1, sort=False )[int($value_column)-1].rolling(${window_len}, win_type=window_type, center=$centering ).${statistics}(**kwargs).reset_index(drop=True) +#else: +df['aggregate'] = df[int($value_column)-1].rolling(${window_len}, win_type=window_type, center=$centering ).${statistics}(**kwargs).reset_index(drop=True) +#end if + +df.to_csv('${outfile}', index=False, header=False, sep='\t', na_rep='0', float_format='%.2f') + ]]> </configfile> + </configfiles> + <inputs> + <param name="infile" type="data" format="tabular,bed.interval" label="Select input file in tabular or BED format"/> + <param name="group_column" type="data_column" data_ref="infile" optional="true" label="Optional column to group" + help="For example if you have a chromosome column you probably want to group each chromosome before you apply any function." /> + + <param name="value_column" type="data_column" data_ref="infile" label="Column with the value of interest" + help="" /> + + <conditional name="smooth_function"> + <param name="smooth_function_opts_selector" type="select" label="Provide a window type" + help="For more information please see https://en.wikipedia.org/wiki/Window_function"> + <option value="boxcar" selected="True">Boxcar or Dirichlet, all points are evenly weighted</option> + <option value="triang">triang</option> + <option value="blackman">blackman</option> + <option value="hamming">hamming</option> + <option value="bartlett">bartlett</option> + <option value="parzen">parzen</option> + <option value="bohman">bohman</option> + <option value="blackmanharris">blackmanharris</option> + <option value="nuttall">nuttall</option> + <option value="barthann">barthann</option> + <!--option value="kaiser">kaiser</option> + <option value="gaussian">gaussian</option> + <option value="general_gaussian">general gaussian</option> + <option value="slepian">slepian</option--> + </param> + <when value="boxcar" /> + <when value="triang" /> + <when value="blackman" /> + <when value="hamming" /> + <when value="bartlett" /> + <when value="parzen" /> + <when value="bohman" /> + <when value="blackmanharris" /> + <when value="nuttall" /> + <when value="barthann" /> + <when value="kaiser"> + <param name="kaiser_beta" type="float" value="0.1" min='0.0' label="beta" /> + </when> + <when value="gaussian"> + <param name="gaussian_std" type="float" value="0.1" min='0.0' label="std" /> + </when> + <when value="general_gaussian"> + <param name="ggaussian_power" type="integer" value="2" min='1' label="power" /> + <param name="ggaussian_width" type="integer" value="2" min="1" label="width" /> + </when> + <when value="slepian"> + <param name="slepian_width" type="integer" value="2" min="1" label="width" /> + </when> + </conditional> + + <param name="statistics" type="select" label="Provide a statistical function"> + <option value="count">Number of non-null observations (count)</option> + <option value="sum">Sum of values (sum)</option> + <option value="mean" selected="true">Mean of values (mean)</option> + <option value="median">Arithmetic median of values (median)</option> + <option value="min">Minimum (min)</option> + <option value="max">max (max)</option> + <option value="std">Bessel-corrected sample standard deviation (std)</option> + <option value="var">Unbiased variance (var)</option> + <option value="skew">Sample skewness (3rd moment)</option> + <option value="kurt">Sample kurtosis (4th moment)</option> + <option value="quantil">Sample quantile (value at %)</option> + <option value="cov">Unbiased covariance (binary) (cov)</option> + <option value="corr">Correlation (corr)</option> + </param> + + <param name="centering" type="boolean" truevalue="True" falsevalue="False" label="center smoothed values" + help="By default the labels are set to the right edge of the window. Here you can change that to the center." /> + <!-- Options for all formats.--> + <param name="window_len" type="integer" value="3" min="2" label="Window length"/> + </inputs> + <outputs> + <data name="outfile" format_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="1.bedgraph"/> + <param name="group_column" value="1"/> + <param name="value_column" value="5"/> + <conditional name="smooth_function"> + <param name="smooth_function_opts_selector" value="boxcar"/> + </conditional> + <param name="window_len" value="3"/> + <output name="outfile" value="1_boxcar.bedgraph"/> + </test> + <test> + <!-- None test --> + <param name="infile" value="1.bedgraph"/> + <param name="value_column" value="5"/> + <conditional name="smooth_function"> + <param name="smooth_function_opts_selector" value="boxcar"/> + </conditional> + <param name="window_len" value="3"/> + <output name="outfile" value="2_boxcar.bedgraph"/> + </test> + <test> + <param name="infile" value="1.bedgraph"/> + <param name="group_column" value="1"/> + <param name="value_column" value="5"/> + <conditional name="smooth_function"> + <param name="smooth_function_opts_selector" value="hamming"/> + </conditional> + <param name="window_len" value="3"/> + <param name="statistics" value="sum"/> + <output name="outfile" value="1_hamming.bedgraph"/> + </test> + <test> + <param name="infile" value="1.bedgraph"/> + <param name="value_column" value="5"/> + <conditional name="smooth_function"> + <param name="smooth_function_opts_selector" value="hamming"/> + </conditional> + <param name="window_len" value="3"/> + <param name="statistics" value="sum"/> + <output name="outfile" value="2_hamming.bedgraph"/> + </test> + </tests> + <help> +<![CDATA[ + +**What it does** + +Provides rolling window calculations, e.g. for smoothing values. + + +]]> + </help> +</tool>