Mercurial > repos > mvdbeek > damidseq_average_scores

<tool id="average_score" name="Calculate average scores" version="0.1.3">
    <description>for fixed step interval files</description>
    <requirements>
        <requirement type="package" version="0.22">pandas</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
python '$average_script'
    ]]></command>
<configfiles>
    <configfile name="average_script">
import pandas as pd

#set files = [str(f) for f in $input_files]
#set column = 3 if $input_files[0].ext in ['bed', 'bedgraph'] else 5
d = {}
#for f in $files:
d['$f'] = pd.read_csv('$f', usecols=[$column], sep="\t", skiprows=$skiprows, header=None, squeeze=True)
#end for
df = pd.DataFrame.from_dict(d)
mean = df.mean(axis=1)
with open('$averaged_output', 'w') as out, open('$files[0]') as first_file:
    for i, line in enumerate(first_file):
        fields = line.strip().split("\t")
        if i >= $skiprows:
            fields[$column] = str(mean[i - $skiprows])
        out.write("%s\n" % "\t".join(fields))
    </configfile>
</configfiles>
    <inputs>
        <param name="input_files" type="data" multiple="true" format="bed,bedgraph,gff" label="Select the files for which to average the score"/>
        <param name="skiprows" type="integer" min="0" value="0" label="Skip the first N rows" help="To skip comments and track definition lines"/>
    </inputs>
    <outputs>
        <data name="averaged_output" format_source="input_files" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <param name="input_files" value="1.bed,2.bed" ftype="bed"/>
            <output name="averaged_output" value="averaged.bed" ftype="bed"/>
        </test>
    </tests>
    <help><![CDATA[
What it does
------------

This tool calculates the average value for the score column across many datasets.

        ]]></help>
</tool>
author	mvdbeek
date	Fri, 27 Apr 2018 14:54:05 -0400
parents
children	7fd65542efc2