Mercurial > repos > jjohnson > fastq_mcf
changeset 0:217aedbdd0d0
Uploaded
author | jjohnson |
---|---|
date | Tue, 13 Mar 2012 14:44:46 -0400 |
parents | |
children | b61f1466ce8f |
files | fastq-mcf.xml |
diffstat | 1 files changed, 204 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq-mcf.xml Tue Mar 13 14:44:46 2012 -0400 @@ -0,0 +1,204 @@ +<tool id="fastq_mcf" name="FastqMcf" version="1.0"> + <description>sequence quality filtering and clipping</description> + <requirements> + <requirement type="binary">fastq-mcf</requirement> + </requirements> + <version_string>fastq-mcf -V</version_string> + <command>fastq-mcf + #if $trimming.choice == 'disable': + -0 + #elif $trimming.choice == 'user_set': + #if len($trimming.scale.__str__) > 0 + -s $trimming.scale + #end if + #if len($trimming.minpct.__str__) > 0 + -t $trimming.minpct + #end if + #if len($trimming.nmin.__str__) > 0 + -m $trimming.nmin + #end if + #if len($trimming.pctdiff.__str__) > 0 + -p $trimming.pctdiff + #end if + #if len($trimming.nmax.__str__) > 0 + -L $trimming.nmax + #end if + #if len($trimming.nkeep.__str__) > 0 + -l $trimming.nkeep + #end if + #if len($trimming.skewpct.__str__) > 0 + -k $trimming.skewpct + #end if + #if len($trimming.qthr.__str__) > 0 + -q $trimming.qthr + #end if + #if len($trimming.qwin.__str__) > 0 + -w $trimming.qwin + #end if + #if len($trimming.pctns.__str__) > 0 + -x $trimming.pctns + #end if + #if len($trimming.sampcnt.__str__) > 0 + -s $trimming.sampcnt + #end if + $trimming.ilv3 + $trimming.rmns + #end if + #if $noclip == True : + $noclip + #else : + -o $reads_out + #if $mates.__str__ != 'None' : + -o $mates_out + #end if + #end if + $adpaters + $reads + #if $mates.__str__ != 'None' : + $mates + #end if + > $log + </command> + <inputs> + <param name="adpaters" type="data" format="fasta" label="A fasta formatted adapter list" /> + <param name="reads" type="data" format="fastqsanger,fastqillumina" label="Reads: single or Left-hand of Paired End Reads" /> + <param name="mates" type="data" format="fastqsanger,fastqillumina" optional="true" label="Right-hand mates for Paired End Reads" /> + <!-- + -s N.N Log scale for clip pct to threshold (2.5) + -t N % occurance threshold before clipping (0.25) + -m N Minimum clip length, overrides scaled auto (1) + -p N Maximum adapter difference percentage (20) + -l N Minimum remaining sequence length (15) + -L N Maximum sequence length (none) + -k N sKew percentage causing trimming (2) + -q N quality threshold causing trimming (10) + -f force output, even if not much will be done + -0 Set all trimming parameters to zero + -U|u Force disable/enable illumina PF filtering + -P N phred-scale (64) + -x N 'N' (Bad read) percentage causing trimming (10) + -R Don't remove N's from the fronts/ends of reads + -n Don't clip, just output what would be done + -C N Number of reads to use for subsampling (200000) + -d Output lots of random debugging stuff + --> + + + <conditional name="trimming"> + <param name="choice" type="select" label="Trimming Options"> + <option value="defaults">Use Defaults</option> + <option value="user_set">Set Values</option> + <option value="disable">Set all trimming parameters to zero</option> + </param> + <when value="defaults"/> + <when value="disable"/> + <when value="user_set"> + <param name="sampcnt" type="integer" optional="true" label="-C Number of reads to use for subsampling (100000)"> + </param> + <param name="scale" type="float" optional="true" label="-s N.N Log scale for clip pct to threshold (2.5)"> + </param> + <param name="minpct" type="float" optional="true" label="-t % occurance threshold before clipping (0.25)"> + </param> + <param name="nmin" type="integer" optional="true" label="-m Minimum clip length, overrides scaled auto (1)"> + </param> + <param name="pctdiff" type="integer" optional="true" label="-p Maximum adapter difference percentage (20)"> + </param> + + <param name="nmax" type="integer" optional="true" label="-L Maximum sequence length (none)"> + </param> + <param name="nkeep" type="integer" optional="true" label="-l Minimum remaining sequence length (15)"> + </param> + <param name="skewpct" type="float" optional="true" label="-k sKew percentage causing trimming (2)"> + </param> + <param name="qthr" type="integer" optional="true" label="-q quality threshold causing trimming (7)" + help="remove end of-read with quality < threshold"> + </param> + <param name="qwin" type="integer" optional="true" label="-w mean quality threshold causing trimming (1)" + help="remove end of read with mean quality < threshold"> + </param> + <param name="pctns" type="float" optional="true" label="-x 'N' (Bad read) percentage causing trimming (10)"> + </param> + <param name="rmns" type="boolean" truevalue="-R" falsevalue="" checked="false" label="-R Don't remove N's from the fronts/ends of reads"/> + <param name="ilv3" type="select" label="illumina PF filtering"> + <option value=" ">Default</option> + <option value="-U">Disable illumina PF filtering</option> + <option value="-u">Enable illumina PF filtering</option> + </param> + </when> + </conditional> + + + <param name="phred" type="integer" optional="true" label="-P phred-scale (64)" help="Default is to determine automatically"> + </param> + + <param name="noclip" type="boolean" truevalue="-n" falsevalue="" checked="false" label="-n Don't clip, just output what would be done"/> + + </inputs> + <outputs> + <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/> + <data name="reads_out" format_source="reads" label="${tool.name} on ${on_string}: reads"> + <filter>noclip == False</filter> + </data> + <data name="mates_out" format_source="mates" label="${tool.name} on ${on_string}: mates"> + <filter>(noclip == False and mates != None)</filter> + </data> + </outputs> + <tests> + </tests> + <help> +**What it does** + +fastq-mcf_ attempts to: + + Detect and remove sequencing adapters and primers + Detect limited skewing at the ends of reads and clip + Detect poor quality at the ends of reads and clip + Detect N's, and remove from ends + Remove reads with CASAVA 'Y' flag (purity filtering) + Discard sequences that are too short after all of the above + Keep multiple mate-reads in sync while doing all of the above + +.. _fastq-mcf: http://code.google.com/p/ea-utils/wiki/FastqMcf +----- + +**Input** + +Fasta file of adapter sequences, for example:: + + > Genomic_DNA_oligonucleotide_sequences_Adapters_F + GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG + > Genomic_DNA_oligonucleotide_sequences_Adapters_R + ACACTCTTTCCCTACACGACGCTCTTCCGATCT + > Genomic_DNA_Sequencing_Primer + ACACTCTTTCCCTACACGACGCTCTTCCGATCT + + + +Reads or Left-hand mates, for example:: + + @1539:931/1 + ACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG + +1539:931/1 + BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + +Right-hand mates, for example:: + + @1539:931/2 + CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT + +1539:931/2 + WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + +----- + +**Output** + +A log file + +A trimmed fastq of the reads + +A trimmed fastq of the mates + + + + </help> +</tool>