changeset 3:f3128f4ffe34 draft default tip

Deleted selected files
author czlab
date Thu, 17 May 2018 22:39:45 -0400
parents 621da360a155
children
files fastq2collapse.xml fastqFilter.xml trimming3.xml
diffstat 3 files changed, 0 insertions(+), 192 deletions(-) [+]
line wrap: on
line diff
--- a/fastq2collapse.xml	Thu May 17 21:33:10 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-<tool id="fastq2collapse" name="Collapse exact PCR duplicates">
-	<description>in FASTQ</description>
-	<!--<command interpreter="perl">-->
-	<command>
-		fastq2collapse.pl -v  $input $output
-	</command>
-	<inputs>
-		<param type="data" format="fastq" name="input" label="Input FASTQ file (.gz file accepted)"/>
-	</inputs>
-
-	<outputs>
-		<data name="output" format="fastq" label="Collapse exact PCR duplicates on ${on_string}" />
-	</outputs>
-
-	<help>
-
-.. class:: infomark
-
-**What this tool does**
-
-This tool collapses exact duplicate sequences.
-
-It takes as input files in FASTQ format of filtered and trimmed reads and output files in FASTQ format in which exact PCR duplicates have been collapsed.
-
-
-
-
-	</help>
-</tool>
-
--- a/fastqFilter.xml	Thu May 17 21:33:10 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-<tool id="fastqFilter" name="Filter FASTQ files">
-  	<description></description>
-  	<command>
-		fastq_filter.pl -v
-			#if $sampleIndex.filterBySampleIndex == "yes":
-				-index $sampleIndex.sequence
-			#end if
-			-maxN $maxN -if sanger -f $filterString  -of $outputFormat $inputfile $outputfile 
-  	</command>
-
-  	<inputs>
-        <param name="inputfile" format="fastq"  type="data" label="Input Sanger FASTQ file (.gz file accepted; see help below for more information)" />
-
-	<conditional name="sampleIndex">
-		<param name="filterBySampleIndex" type="select" label="Filter by sample index (see help below for parameter suggestion)" >
-		<option value="yes">Yes</option>
-		<option value="no" selected="true">No</option>
-		</param>
-		<when value="yes">
-			<param name="sequence" type="text" value="" label="Index position and sequence" />
-		</when>
-		<when value="no">
-		</when>
-	</conditional>	
-
-    	<param name="filterString"  type="text" value="" label="Quality score filter string; format: Method:Start-End:Score (zero-based; see help below for parameter suggestion)" />
-	<param name="maxN" type="integer" value="-1" label="Max number of N in sequence (default off - value less than 0) " />
-	<param name="outputFormat" type="select" label="Output data type">
-		<option value="fastq">FASTQ</option>
-		<option value="fasta">FASTA</option>
-	</param>
-
-  	</inputs>
-
-	<outputs>
-	<data name="outputfile" format="fastq" label="Read quality filtering on ${on_string}">
-		<change_format>
-			<when input="OutputFormat" value="fasta" format="fasta" />
-		</change_format>
-	</data>	
-	</outputs>
-
-	<help>
-
-.. class:: infomark
-
-**What this tool does**
-
-This tool extracts reads passing quality filters.
-
-It takes as input Sanger FASTQ files and output FASTQ/A files of filtered reads.
-
------
-
-**FASTQ format**
-
-Check quality score in the FASTQ file for the right format.
-
-Reference https://en.wikipedia.org/wiki/FASTQ_format#Quality :
-
-* Sanger format can encode a Phred quality score from 0 to 93 using ASCII 33 to 126. 
-* Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126.
-
-See http://www.asciitable.com/ for ASCII table.
-
------
-
-**Filter by sample index (optional)**
-
-For users who would like to start from a FASTQ file consisting of multiple libraries.  
-
-For example:
-
-If you have six samples with indexes GTCA, GCAT, ACTG, AGCT, GCAT, TCGA, you can extract reads for each library with indicated index sequences (e.g. GTCA, etc.) starting from position 0 in the read. For example, you could specify 0:GTCA, etc.
-
------
-
-**How to set the filter**
-
-You can apply multiple filtering criteria based on the quality scores for each read. They are separated by commas.
-
-Each critieron is composed of four components (e.g. method1:start1-end1:score1,method2:start2-end2:score2)
-
-1. Method: min or mean, which means requirement on minimal or mean score of a region 
-2. Start:  the first nucleotide to consider (0-based)
-3. End:    the last nucleotide to consider (0-based)
-4. score:  the threshold required
-
-**Parameter suggestion**
-
-For example:
-
-* For Standard CLIP protocol filtering: mean:0-29:20 (this specifies a mean score of 20 or above in the first 30 bases, which includes 5 positions with sample indexes and the random barcode, followed by 25 positions with the actual CLIP tag).
-* For iCLIP/BrdU CLIP filtering: mean:0-38:20 (this specifies a mean score of 20 or above in the first 39 bases, which includes 14 positions with sample indexes and the random barcode, followed by 25 positions with the actual CLIP tag). 
-
-The reason to filter as such is because low quality reads can introduce mapping errors and background. They will inflate the number of unique tags after removal of PCR duplicates. 
-
-
-
-	</help>
-</tool>
--- a/trimming3.xml	Thu May 17 21:33:10 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="trimming3" name="Trim 3' adapter">
-  <description> using FASTX Toolkit</description>
-  
-  <command>
-	fastx_clipper -a $adapterSeq -l $discardShorterThan $discardNonclipped $discardClipped $adapterOnly $keepUnknown   
-	#if $minAdapterAlignment.minOverlapRequired =="yes":
-		-M $minAdapterAlignment.minLen
-	#end if	
-	-v -i $input 2>/dev/null | fastq_quality_trimmer -v -l $discardShorterThan -t $qualityThreshold -o $output
-  </command>
-
-  <inputs>
-	<param name="input" type="data" format="fastq" label="Input FASTQ file"/>
-    
-	<param name="adapterSeq" type="text" value="" label="Adapter sequence (the 3' adapter will vary for different CLIP protocol variations)"/>
-        <param name="discardShorterThan" type="integer" value="" label="Discard sequences shorter than N nucleotides (see help below for parameter suggestion)"/>
-	<param name="discardNonclipped" type="boolean" truevalue="-c" falsevalue="" checked="no" label="Discard non-trimmed sequences (i.e. - keep only sequences which contained the adapter)" />
-        <param name="discardClipped" type="boolean" truevalue="-C" falsevalue="" checked="no" label="Discard trimmed sequences (i.e. - keep only sequences which did not contained the adapter)" />
-        <param name="adapterOnly" type="boolean" truevalue="-k" falsevalue="" checked="no" label="Report Adapter-Only sequences"/>
-        <param name="keepUnknown" type="boolean" truevalue="-n" falsevalue="" checked="yes"  label="Keep sequences with unknown nucleotides"/>
-	<conditional name="minAdapterAlignment">
-		<param name="minOverlapRequired" type="select" label="Require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't trim it.">
-			<option value="yes">Yes</option>
-			<option value="no" selected="True">No</option>
-		</param>
-		<when value="yes">
-			<param name="minLen" type="integer" value="" label="Input the length"/>
-		</when>
-		<when value="no">
-		</when>
-	</conditional>
-        <param name="qualityThreshold" type="integer" value="5" label="Quality threshold - nucleotides with lower quality will be trimmed (from the end of the sequence)"/>
-        <!--<param name="CompressOutput" type="boolean" truevalue="-z" falsevalue="" checked="no" label="Compress output with GZIP"/> -->
- 
-  </inputs>
-  
-  <outputs>
-	<data name="output" format="fastq" label="Trim 3' adapter on ${on_string} "/>
-  </outputs>
-  <help>
-
-.. class:: infomark
-
-**What this tool does**
-
-
-This tool takes as input FASTQ files and output FASTQ files with 3' adapters and extremely low quality bases (e.g. score less than 5) removed. 
-
-It is a wrapper of fastx_clipper and fastq_quality_trimmer that are a part of the FASTX Toolkit (http://hannonlab.cshl.edu/fastx_toolkit/).
-
------
-
-**Parameter suggestion for discarding sequences**
-
-We typically require high quality score in barcode and 15 nt of CLIP tags.
-* For standard CLIP: discard sequences shorter than 20 nt (5 nt barcode + 15 nt CLIP tag).
-* For BrdU CLIP: discard sequences shorter than 29 nucleotides (14 nt barcode + 15 nt CLIP tag).
-
-  </help>
-
-</tool>