Mercurial > repos > bgruening > replace_column_by_key_value_file

diff replaceColumn.xml @ 0:cc18bac5afdb draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 045006e0b2fe5b4fe96583949b0c757eb6a734a4
author: bgruening
date: Fri, 24 Feb 2017 10:14:15 -0500
children: d533e4b75800
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/replaceColumn.xml	Fri Feb 24 10:14:15 2017 -0500
@@ -0,0 +1,107 @@
+<tool id="replace_column_with_key_value_file" name="Replace column" version="0.1">
+    <description>by values which are defined in a convert file</description>
+    <command>
+        <![CDATA[
+        python '$replaceColumnScript'
+        ]]>
+    </command>
+    <configfiles>
+        <configfile name="replaceColumnScript">
+            <![CDATA[
+import sys
+
+replace_file = '$replace_information'
+original_file = '$original_file'
+column = int("$column_replace") - 1
+ignore_start_lines = int("$skip_lines")
+delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter")
+
+## read conversion information to index 
+conversion = {}
+
+with open(replace_file, 'r') as conversion_file:
+    for line in conversion_file:
+        conv_key_value = line.strip().split()
+        if len(conv_key_value) == 2:
+            conversion[conv_key_value[0]] = conv_key_value[1]                
+
+## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped.
+with open("output_file", 'w') as output:
+    with open(original_file) as original:
+        for i, line in enumerate(original):
+            if i < ignore_start_lines:
+                output.write(line)
+                continue
+
+            if str("$delimiter") == "":
+                line_content = line.split()
+            else:
+                line_content = line.split(str("$delimiter"))
+
+            out = list()
+            for j, line_content_column in enumerate(line_content):
+                if j == column:
+                    if line_content_column in conversion:
+                        out.append(conversion[line_content_column])
+                else:
+                    out.append(line_content_column)
+
+            if len(out) == len(line_content):
+                output.write('%s\n' % delimiter_local.join(out))
+
+]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="original_file" type="data" format="tabular"
+               label="File in which you want to replace some values"
+               help="The entries of a specific column are replaced by the information given by the next input file." />
+        <param name="replace_information" type="data" format="tabular"
+               label="Replace information file"
+               help="This file contains in the first column the entries that should be replaced by the values of the second column." />
+        <param name="column_replace" type="data_column" data_ref="original_file" multiple="false"
+               label="Which column should be replaced?" />
+        <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" />
+        <param name="delimiter" type="select" label="Delimited by">
+            <option value="" selected="True">Tab</option>
+            <option value=" ">Whitespace</option>
+            <option value=".">Dot</option>
+            <option value=",">Comma</option>
+            <option value="-">Dash</option>
+            <option value="_">Underscore</option>
+            <option value="|">Pipe</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data  name="outfile_replace" format="txt" from_work_dir="output_file"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
+            <param name="original_file" value="original_file" ftype="tabular" />
+            <param name="column_replace" value="1"/>
+            <param name="skip_lines" value="1"/>
+            <param name="delimiter" value="" />
+            <output name="outfile_replace" file="result_file"/>
+        </test>
+        <test>
+            <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" />
+            <param name="original_file" value="empty_mapping" ftype="tabular" />
+            <param name="column_replace" value="1"/>
+            <param name="skip_lines" value="1"/>
+            <param name="delimiter" value="" />
+            <output name="outfile_replace" file="result_file_empty_mapping"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+
+This tool replaces the entries of a defined column with entries given by a replacement file. 
+For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. 
+A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation.
+
+A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings
+        ]]>
+    </help>
+</tool>
author	bgruening
date	Fri, 24 Feb 2017 10:14:15 -0500
parents
children	d533e4b75800