Mercurial > repos > bgruening > replace_column_by_key_value_file
view replaceColumn.xml @ 1:d533e4b75800 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 0def21576e206a0732ce63bacd18533064ddf155
author | bgruening |
---|---|
date | Sun, 23 Sep 2018 04:03:34 -0400 |
parents | cc18bac5afdb |
children |
line wrap: on
line source
<tool id="replace_column_with_key_value_file" name="Replace column" version="0.2"> <description>by values which are defined in a convert file</description> <command> <![CDATA[ python '$replaceColumnScript' ]]> </command> <configfiles> <configfile name="replaceColumnScript"> <![CDATA[ import sys replace_file = '$replace_information' original_file = '$original_file' column = int("$column_replace") - 1 ignore_start_lines = int("$skip_lines") delimiter_local = "\t" if str("$delimiter") == "tab" else str("$delimiter") comment_str = str("$pass_comments") unk_strat = str("$unknowns_strategy") ## read conversion information to index conversion = {} with open(replace_file, 'r') as conversion_file: for line in conversion_file: conv_key_value = line.strip().split() if len(conv_key_value) == 2: conversion[conv_key_value[0]] = conv_key_value[1] ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. with open("output_file", 'w') as output: with open(original_file) as original: for i, line in enumerate(original): if i < ignore_start_lines or (comment_str and line.startswith(comment_str)): output.write(line) continue line_content = line.rstrip().split(delimiter_local) out = list() for j, line_content_column in enumerate(line_content): if j == column: if line_content_column in conversion: out.append(conversion[line_content_column]) elif unk_strat == "print": out.append(line_content_column) elif unk_strat == "error": raise Exception('ERROR: Encountered a value [%s] in the file that is not in the replacements file and is not commented with [%s]' % (line_content_column, comment_str)) else: out.append(line_content_column) if len(out) == len(line_content): output.write('%s\n' % delimiter_local.join(out)) ]]> </configfile> </configfiles> <inputs> <param name="original_file" type="data" format="tabular" label="File in which you want to replace some values" help="The entries of a specific column are replaced by the information given by the next input file." /> <param name="replace_information" type="data" format="tabular" label="Replace information file" help="This file contains in the first column the entries that should be replaced by the values of the second column." /> <param name="column_replace" type="data_column" data_ref="original_file" multiple="false" label="Which column should be replaced?" /> <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> <param name="delimiter" type="select" label="Delimited by"> <option value="tab" selected="True">Tab</option> <option value=" ">Space</option> <option value=".">Dot</option> <option value=",">Comma</option> <option value="-">Dash</option> <option value="_">Underscore</option> <option value="|">Pipe</option> </param> <param name="unknowns_strategy" type="select" label="When an unknown value is encountered"> <option value="skip" selected="True">Skip / Do not print</option> <option value="print">Print without modification</option> <option value="error">Exit with an error</option> </param> <param name="pass_comments" type="text" value="#" label="Do not perform replacement on lines starting with"> <sanitizer> <valid> <add value="#" /> </valid> </sanitizer> </param> </inputs> <outputs> <data name="outfile_replace" format="txt" from_work_dir="output_file"/> </outputs> <tests> <test> <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> <param name="original_file" value="original_file" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="1"/> <param name="delimiter" value="tab" /> <param name="unknowns_strategy" value="skip"/> <param name="pass_comments" value="#"/> <output name="outfile_replace" file="result_file"/> </test> <test> <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> <param name="original_file" value="empty_mapping" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="1"/> <param name="delimiter" value="tab" /> <param name="unknowns_strategy" value="skip"/> <param name="pass_comments" value="#"/> <output name="outfile_replace" file="result_file_empty_mapping"/> </test> <test expect_failure="True"> <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="0"/> <param name="delimiter" value="tab" /> <param name="unknowns_strategy" value="error"/> <param name="pass_comments" value="#"/> </test> <test> <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> <param name="column_replace" value="1"/> <param name="skip_lines" value="0"/> <param name="delimiter" value="tab" /> <param name="unknowns_strategy" value="print"/> <param name="pass_comments" value="#"/> <output name="outfile_replace" file="neg_test_commented.txt"/> </test> </tests> <help> <![CDATA[ **What it does** This tool replaces the entries of a defined column with entries given by a replacement file. For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings ]]> </help> </tool>