Mercurial > repos > mvdbeek > damidseq_average_scores
changeset 1:7fd65542efc2 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_average_score commit 14ffe107199084dcb9f4a9f693ef7b6be097a87b
author | mvdbeek |
---|---|
date | Sat, 28 Apr 2018 13:51:18 -0400 |
parents | 427f5dda8854 |
children | |
files | average_scores.xml test-data/1.bed test-data/2.bed test-data/averaged.bed |
diffstat | 4 files changed, 15 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/average_scores.xml Fri Apr 27 14:54:05 2018 -0400 +++ b/average_scores.xml Sat Apr 28 13:51:18 2018 -0400 @@ -11,32 +11,37 @@ import pandas as pd #set files = [str(f) for f in $input_files] -#set column = 3 if $input_files[0].ext in ['bed', 'bedgraph'] else 5 +#set column = {'bed': 4, 'bedgraph': 3, 'gff': 5, 'gff3': 5, 'gtf': 5}[$input_files[0].ext] +skiprows = 0 +with open('$files[0]') as first_file: + for i, line in enumerate(first_file): + if not line.startswith(('track', 'browser', '#')): + skiprows = i + break d = {} #for f in $files: -d['$f'] = pd.read_csv('$f', usecols=[$column], sep="\t", skiprows=$skiprows, header=None, squeeze=True) +d['$f'] = pd.read_csv('$f', usecols=[$column], sep="\t", skiprows=skiprows, header=None, squeeze=True) #end for df = pd.DataFrame.from_dict(d) mean = df.mean(axis=1) with open('$averaged_output', 'w') as out, open('$files[0]') as first_file: for i, line in enumerate(first_file): fields = line.strip().split("\t") - if i >= $skiprows: - fields[$column] = str(mean[i - $skiprows]) + if i >= skiprows: + fields[$column] = str(mean[i - skiprows]) out.write("%s\n" % "\t".join(fields)) </configfile> </configfiles> <inputs> <param name="input_files" type="data" multiple="true" format="bed,bedgraph,gff" label="Select the files for which to average the score"/> - <param name="skiprows" type="integer" min="0" value="0" label="Skip the first N rows" help="To skip comments and track definition lines"/> </inputs> <outputs> <data name="averaged_output" format_source="input_files" label="${tool.name} on ${on_string}"/> </outputs> <tests> <test> - <param name="input_files" value="1.bed,2.bed" ftype="bed"/> - <output name="averaged_output" value="averaged.bed" ftype="bed"/> + <param name="input_files" value="1.bed,2.bed" ftype="bedgraph"/> + <output name="averaged_output" value="averaged.bed" ftype="bedgraph"/> </test> </tests> <help><![CDATA[
--- a/test-data/1.bed Fri Apr 27 14:54:05 2018 -0400 +++ b/test-data/1.bed Sat Apr 28 13:51:18 2018 -0400 @@ -1,3 +1,4 @@ +track type=bedGraph name="test" X 23522161 23522595 0 X 23522595 23522617 0 X 23522617 23522667 0