diff fastx_collapser.xml @ 0:78a7d28f2a15 draft

Uploaded
author idot
date Wed, 10 Jul 2013 06:13:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_collapser.xml	Wed Jul 10 06:13:48 2013 -0400
@@ -0,0 +1,92 @@
+<tool id="cshl_fastx_collapser" name="Collapse">
+	<description>sequences</description>
+	<command>
+cat '$input' |
+fastx_collapser
+#if $input.ext == "fastqsanger":
+ -Q 33
+#elif $input.ext == "fastq":
+ -Q 64
+#end if
+ -v -o '$output'
+</command>
+
+	<inputs>
+		<param format="fastq,fastqsanger,fasta" name="input" type="data" label="Library to collapse" />
+	</inputs>
+
+	<tests>
+		<test>
+			<param name="input" value="fasta_collapser1.fasta" />
+			<output name="output" file="fasta_collapser1.out" />
+		</test>
+	</tests>
+
+	<outputs>
+		<data format="fasta" name="output" metadata_source="input" 
+		/>
+	</outputs>
+  <help>
+
+**What it does**
+
+This tool collapses identical sequences in a FASTQ or FASTA file into a single sequence.
+
+--------
+
+**Example**
+
+Example Input File (Sequence "ATAT" appears multiple times):: 
+
+    >CSHL_2_FC0042AGLLOO_1_1_605_414
+    TGCG
+    >CSHL_2_FC0042AGLLOO_1_1_537_759
+    ATAT
+    >CSHL_2_FC0042AGLLOO_1_1_774_520
+    TGGC
+    >CSHL_2_FC0042AGLLOO_1_1_742_502
+    ATAT
+    >CSHL_2_FC0042AGLLOO_1_1_781_514
+    TGAG
+    >CSHL_2_FC0042AGLLOO_1_1_757_487
+    TTCA
+    >CSHL_2_FC0042AGLLOO_1_1_903_769
+    ATAT
+    >CSHL_2_FC0042AGLLOO_1_1_724_499
+    ATAT
+
+Example Output file::
+
+    >1-1
+    TGCG
+    >2-4
+    ATAT
+    >3-1
+    TGGC
+    >4-1
+    TGAG
+    >5-1
+    TTCA
+    
+.. class:: infomark
+
+Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded. 
+
+The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value.
+
+The following output::
+
+    >2-4
+    ATAT
+
+means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file.
+
+------
+
+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
+
+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/    
+
+</help>
+</tool>
+<!-- FASTX-Collapser is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->