changeset 2:621da360a155 draft

Uploaded
author czlab
date Thu, 17 May 2018 21:33:10 -0400
parents 64429d5e9365
children f3128f4ffe34
files trimming3.xml
diffstat 1 files changed, 61 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trimming3.xml	Thu May 17 21:33:10 2018 -0400
@@ -0,0 +1,61 @@
+<tool id="trimming3" name="Trim 3' adapter">
+  <description> using FASTX Toolkit</description>
+  
+  <command>
+	fastx_clipper -a $adapterSeq -l $discardShorterThan $discardNonclipped $discardClipped $adapterOnly $keepUnknown   
+	#if $minAdapterAlignment.minOverlapRequired =="yes":
+		-M $minAdapterAlignment.minLen
+	#end if	
+	-v -i $input 2>/dev/null | fastq_quality_trimmer -v -l $discardShorterThan -t $qualityThreshold -o $output
+  </command>
+
+  <inputs>
+	<param name="input" type="data" format="fastq" label="Input FASTQ file"/>
+    
+	<param name="adapterSeq" type="text" value="" label="Adapter sequence (the 3' adapter will vary for different CLIP protocol variations)"/>
+        <param name="discardShorterThan" type="integer" value="" label="Discard sequences shorter than N nucleotides (see help below for parameter suggestion)"/>
+	<param name="discardNonclipped" type="boolean" truevalue="-c" falsevalue="" checked="no" label="Discard non-trimmed sequences (i.e. - keep only sequences which contained the adapter)" />
+        <param name="discardClipped" type="boolean" truevalue="-C" falsevalue="" checked="no" label="Discard trimmed sequences (i.e. - keep only sequences which did not contained the adapter)" />
+        <param name="adapterOnly" type="boolean" truevalue="-k" falsevalue="" checked="no" label="Report Adapter-Only sequences"/>
+        <param name="keepUnknown" type="boolean" truevalue="-n" falsevalue="" checked="yes"  label="Keep sequences with unknown nucleotides"/>
+	<conditional name="minAdapterAlignment">
+		<param name="minOverlapRequired" type="select" label="Require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't trim it.">
+			<option value="yes">Yes</option>
+			<option value="no" selected="True">No</option>
+		</param>
+		<when value="yes">
+			<param name="minLen" type="integer" value="" label="Input the length"/>
+		</when>
+		<when value="no">
+		</when>
+	</conditional>
+        <param name="qualityThreshold" type="integer" value="5" label="Quality threshold - nucleotides with lower quality will be trimmed (from the end of the sequence)"/>
+        <!--<param name="CompressOutput" type="boolean" truevalue="-z" falsevalue="" checked="no" label="Compress output with GZIP"/> -->
+ 
+  </inputs>
+  
+  <outputs>
+	<data name="output" format="fastq" label="Trim 3' adapter on ${on_string} "/>
+  </outputs>
+  <help>
+
+.. class:: infomark
+
+**What this tool does**
+
+
+This tool takes as input FASTQ files and output FASTQ files with 3' adapters and extremely low quality bases (e.g. score less than 5) removed. 
+
+It is a wrapper of fastx_clipper and fastq_quality_trimmer that are a part of the FASTX Toolkit (http://hannonlab.cshl.edu/fastx_toolkit/).
+
+-----
+
+**Parameter suggestion for discarding sequences**
+
+We typically require high quality score in barcode and 15 nt of CLIP tags.
+* For standard CLIP: discard sequences shorter than 20 nt (5 nt barcode + 15 nt CLIP tag).
+* For BrdU CLIP: discard sequences shorter than 29 nucleotides (14 nt barcode + 15 nt CLIP tag).
+
+  </help>
+
+</tool>