changeset 0:6c38443b46ac draft

planemo upload
author jowong
date Thu, 22 Nov 2018 09:47:44 -0500
parents
children 8217df2fd8c5
files kwip.xml kwip_postprocess.py kwip_postprocess.xml tool_dependencies.xml
diffstat 4 files changed, 129 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kwip.xml	Thu Nov 22 09:47:44 2018 -0500
@@ -0,0 +1,61 @@
+<tool id="kwip" name="kwip" version="1.2.9">
+    <description>Calculates k-mer weighted inner product, a de novo estimator of genetic similarity</description>
+    <requirements>
+        <requirement type="package" version="0.2.0">kwip</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #for $input in $inputs# cp $input $(input.element_identifier).ct.gz && #end for# 
+        kwip 
+        -t $advanced.thread 
+        -k kwip.kernel 
+        -d kwip.dist 
+        #for $input in $inputs# $(input.element_identifier).ct.gz #end for#
+        #for $input in $inputs# && rm $(input.element_identifier).ct.gz #end for#
+    ]]></command>
+    <inputs>
+        <param name="inputs" format="data" type="data_collection" label="inputs" help="Specify dataset with hashed reads"/>
+        <section name="advanced" title="Advanced options" expanded="false">
+            <param type="boolean" argument="--unweighted" label="Unweighted Inner Product" checked="false" truevalue="--unweighted" falsevalue="" help="Use the unweighted inner proudct kernel. (default: False)"/>
+            <param name="thread" type="integer" value="1" label="Threads" help="Number of simultaneous threads to execute (default: 1)" />
+
+        </section>
+    </inputs>
+    <outputs>
+        <data name="kwip_kernel" label="kWip kernel" format="txt" type="data" from_work_dir="kwip.kernel"/>
+        <data name="kwip_distance" label="kWip distance" format="txt" type="data" from_work_dir="kwip.dist"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help><![CDATA[
+        usage: USAGE: kwip [options] hashes
+
+	kWip Options.
+
+	optional arguments:
+          -t, --threads       Number of threads to utilise. [default N_CPUS]
+          -k, --kernel        Output file for the kernel matrix. [default None]
+          -d, --distance      Output file for the distance matrix. [default stdout]
+          -U, --unweighted    Use the unweighted inner proudct kernel. [default off]
+          -w, --weights       Bin weight vector file (input, or output w/ -C).
+          -C, --calc-weights  Calculate only the bin weight vector, not kernel matrix.
+          -h, --help          Print this help message.
+          -V, --version       Print the version string.
+          -v, --verbose       Increase verbosity. May or may not acutally do anything.
+          -q, --quiet         Execute silently but for errors.
+
+          Each sample's oxli Countgraph should be specified after arguments:
+          kwip [options] sample1.ct sample2.ct ... sampleN.ct
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubPythonPRINCE,
+  author = {Murray, Kevin},
+  year = {2015},
+  title = {kWIP},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/kdmurray91/kWIPE},
+}</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kwip_postprocess.py	Thu Nov 22 09:47:44 2018 -0500
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import sys
+import argparse as ap
+import re
+parser = ap.ArgumentParser(prog='kwip_postprocess', conflict_handler='resolve',
+                           description="Postprocess galaxy kWIP output")
+
+input = parser.add_argument_group('Input', '')
+input.add_argument('-i', '--input', nargs=1, required=True, help="kWIP galaxy OUTPUT")
+
+
+if len(sys.argv) == 0:
+    parser.print_usage()
+    sys.exit(1)
+
+args = parser.parse_args()
+
+
+with open(args.input[0]) as kwip_output:
+	with open('kwip_postprocess_output.txt', 'w') as output:
+		for line in kwip_output:
+                        new_line = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', line)
+			output.write(new_line)
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kwip_postprocess.xml	Thu Nov 22 09:47:44 2018 -0500
@@ -0,0 +1,17 @@
+<tool id="kwip_postprocess" name="kWIP Postprocess" version="1.0.0">
+    <description>Postprocess kWIP galaxy output</description>
+    <command interpreter="python"><![CDATA[
+        kwip_postprocess.py -i $kwip_output  
+    ]]></command>
+    <inputs>
+        <param name="kwip_output" type="data" format="txt" label="kWIP output"  />
+    </inputs>
+    <outputs>
+        <data name="output" format="txt" from_work_dir="kwip_postprocess_output.txt"/>
+    </outputs>
+    <help>
+This tool processes the kwip galaxy output such that it is in line with the command line
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Nov 22 09:47:44 2018 -0500
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="kwip" version="0.2.0">
+        <install version="1.0">
+            <actions_group>
+                <actions architecture="x86_64" os="linux">
+                    <action type="download_by_url">https://github.com/kdmurray91/kWIP/releases/download/0.2.0/kwip-binaries_0.2.0.tar.gz</action>
+                    <action type="move_directory_files">
+                        <source_directory>.</source_directory>
+                        <destination_directory>$INSTALL_DIR</destination_directory>
+                    </action>
+                </actions>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions_group>
+        </install>
+        <readme>
+<![CDATA[
+kWIP works by decomposing sequencing reads to short k-mers, hashing these k-mers and performing pairwise distance calculation between these sample k-mer hashes. We use khmer from the DIB lab, UC Davis to hash sequencing reads. KWIP calculates the distance between samples in a computationally efficient manner, and generates a distance matrix which may be used by downstream tools. The power of kWIP comes from the weighting applied across different hash values, which decreases the effect of erroneous, rare or over-abundant k-mers while focusing on k-mers which give the most insight into the similarity of samples.
+]]>
+        </readme>
+    </package>
+</tool_dependency>