Mercurial > repos > bgruening > replace_column_by_key_value_file
diff replaceColumn.xml @ 0:cc18bac5afdb draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 045006e0b2fe5b4fe96583949b0c757eb6a734a4
author | bgruening |
---|---|
date | Fri, 24 Feb 2017 10:14:15 -0500 |
parents | |
children | d533e4b75800 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replaceColumn.xml Fri Feb 24 10:14:15 2017 -0500 @@ -0,0 +1,107 @@ +<tool id="replace_column_with_key_value_file" name="Replace column" version="0.1"> + <description>by values which are defined in a convert file</description> + <command> + <![CDATA[ + python '$replaceColumnScript' + ]]> + </command> + <configfiles> + <configfile name="replaceColumnScript"> + <![CDATA[ +import sys + +replace_file = '$replace_information' +original_file = '$original_file' +column = int("$column_replace") - 1 +ignore_start_lines = int("$skip_lines") +delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter") + +## read conversion information to index +conversion = {} + +with open(replace_file, 'r') as conversion_file: + for line in conversion_file: + conv_key_value = line.strip().split() + if len(conv_key_value) == 2: + conversion[conv_key_value[0]] = conv_key_value[1] + +## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. +with open("output_file", 'w') as output: + with open(original_file) as original: + for i, line in enumerate(original): + if i < ignore_start_lines: + output.write(line) + continue + + if str("$delimiter") == "": + line_content = line.split() + else: + line_content = line.split(str("$delimiter")) + + out = list() + for j, line_content_column in enumerate(line_content): + if j == column: + if line_content_column in conversion: + out.append(conversion[line_content_column]) + else: + out.append(line_content_column) + + if len(out) == len(line_content): + output.write('%s\n' % delimiter_local.join(out)) + +]]> + </configfile> + </configfiles> + <inputs> + <param name="original_file" type="data" format="tabular" + label="File in which you want to replace some values" + help="The entries of a specific column are replaced by the information given by the next input file." /> + <param name="replace_information" type="data" format="tabular" + label="Replace information file" + help="This file contains in the first column the entries that should be replaced by the values of the second column." /> + <param name="column_replace" type="data_column" data_ref="original_file" multiple="false" + label="Which column should be replaced?" /> + <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> + <param name="delimiter" type="select" label="Delimited by"> + <option value="" selected="True">Tab</option> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + </inputs> + <outputs> + <data name="outfile_replace" format="txt" from_work_dir="output_file"/> + </outputs> + <tests> + <test> + <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> + <param name="original_file" value="original_file" ftype="tabular" /> + <param name="column_replace" value="1"/> + <param name="skip_lines" value="1"/> + <param name="delimiter" value="" /> + <output name="outfile_replace" file="result_file"/> + </test> + <test> + <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> + <param name="original_file" value="empty_mapping" ftype="tabular" /> + <param name="column_replace" value="1"/> + <param name="skip_lines" value="1"/> + <param name="delimiter" value="" /> + <output name="outfile_replace" file="result_file_empty_mapping"/> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +This tool replaces the entries of a defined column with entries given by a replacement file. +For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. +A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. + +A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings + ]]> + </help> +</tool>