Mercurial > repos > jmariette > pyrocleaner

<tool id="pyrocleaner" name="pyrocleaner" version="1.2">
	<description>454 sequence cleaning</description>
    <requirements>
        <requirement type="binary">megablast</requirement>
        <requirement type="binary">formatdb</requirement>
        <requirement type="binary">cross_match</requirement>
        <requirement type="binary">sfffile</requirement>
        <requirement type="binary">sff_extract.py</requirement>
        <requirement type="python-module">Bio</requirement>
        <requirement type="python-module">igraph</requirement>
    </requirements>
	<command interpreter="python">pyrocleaner_wrapper.py
		#set $options = " --in='%s' " % ($input)
		#if $clean_pairends.clean_pairends_select=="y":
			--out-pe-qual="$out_pe_qual"
			--out-pe-fasta="$out_pe_fasta"
			#set $options = $options + " --clean-pairends --border-limit='%s' --missmatch='%s'" % ($clean_pairends.border_limit,$clean_pairends.missmatch)
    	#end if
		#if $clean_length_std.clean_length_std_select=="y":
			#set $options = $options + " --clean-length-std --std='%s'" % ($clean_length_std.std)
    	#end if
		#if $clean_length_win.clean_length_win_select=="y":
			#set $options = $options + " --clean-length-win --min='%s' --max='%s'" % ($clean_length_win.min,$clean_length_win.max)
    	#end if
		#if $clean_ns.clean_ns_select=="y":
			#set $options = $options + " --clean-ns --ns_percent='%s'" % ($clean_ns.ns_percent)
    	#end if
		#if $clean_duplicated_reads.clean_duplicated_reads_select=="y":
			#set $options = $options + " --clean-duplicated-reads --duplication_limit='%s'" % ($clean_duplicated_reads.duplication_limit)
			#if $str($clean_duplicated_reads.aggressive) != 'None':
				#set $options = $options + " --aggressive"
			#end if
    	#end if
		#if $clean_complexity_win.clean_complexity_win_select=="y":
			#set $options = $options + " --clean-complexity-win --complexity='%s' --window='%s' --step='%s'" % ($clean_complexity_win.complexity,$clean_complexity_win.window,$clean_complexity_win.step)
    	#end if
		#if $clean_complexity_full.clean_complexity_full_select=="y":
			#set $options = $options + " --clean-complexity-full --complexity='%s'" % ($clean_complexity_full.complexity)
    	#end if
		#if $clean_quality.clean_quality_select=="y":
			#set $options = $options + " --clean-quality --quality-threshold='%s'" % ($clean_quality.quality_threshold)
    	#end if
    	--options="$options" --log="$log" --output="$output" --out-dir="$output.extra_files_path" --format="$input.extension"
	</command>
	<inputs>
		<param name="input" type="data" format="sff,fastq" label="454 reads" />
		<conditional name="clean_pairends">
    		<param name="clean_pairends_select" type="select" label="Clean pairends">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
				<param name="border_limit" type="integer" size="10" value="70" label="Minimal length between the spacer and the read extremity"/>
				<param name="missmatch" type="integer" size="10" value="10" label="Limit of missmatch"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_length_std">
    		<param name="clean_length_std_select" type="select" label="Filter reads shorter than mean less x*standard deviation and reads longer than mean plus x*standard deviation">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
			   <param name="std" type="integer" size="10" value="2" label="Number of standard deviation"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_length_win">
    		<param name="clean_length_win_select" type="select" label="Filter reads with a length within specified values">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
				<param name="min" type="integer" size="10" value="200" label="Minimal length"/>
				<param name="max" type="integer" size="10" value="600" label="Maximal length"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_ns">
    		<param name="clean_ns_select" type="select" label="Filter reads with too many N">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
		    	<param name="ns_percent" type="integer" size="10" value="4" label="Percentage of N to use to filter reads"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_duplicated_reads">
    		<param name="clean_duplicated_reads_select" type="select" label="Filter duplicated reads">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
	      		<param name="aggressive" type="select" display="checkboxes" multiple="True" label="Keep only one read per cluster">
	        		<option value="--aggressive">Yes</option>
	      		</param>
		    	<param name="duplication_limit" type="integer" size="10" value="70" label="Limit size difference to use when cleaning duplicated reads"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_complexity_win">
    		<param name="clean_complexity_win_select" type="select" label="Filter low complexity reads computed on a sliding window">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
				<param name="window" type="integer" size="10" value="100" label="The window size to use when filtering reads based on their complexity"/>
   				<param name="step" type="integer" size="10" value="5" label="The window step to use when filtering reads based on their complexity"/>
   				<param name="complexity" type="integer" size="10" value="40" label="Minimal complexity/length ratio to use when filtering reads based on their complexity"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_complexity_full">
    		<param name="clean_complexity_full_select" type="select" label="Filter low complexity reads computed on the whole sequence">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
   				<param name="complexity" type="integer" size="10" value="40" label="Minimal complexity/length ratio to use when filtering reads based on their complexity"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
		<conditional name="clean_quality">
    		<param name="clean_quality_select" type="select" label="Filter low quality reads">
        		<option value="y">Yes</option>
        		<option selected="true" value="n">No</option>
    		</param>
		    <when value="y">
		    	<param name="quality_threshold" type="integer" size="10" value="35" label="At least one base pair has to be equal or higher than this value when filtering reads considering their quality"/>
		    </when>
		    <when value="n">
		    </when>
		</conditional>
	</inputs>
  <outputs>
    <data name="log" format="txt" label="${tool.name} on ${on_string}: log file"/>
    <data name="output" format="input" label="${tool.name} on ${on_string}: cleaned reads"/>
   	<data name="out_pe_fasta" format="fasta" label="${tool.name} on ${on_string}: shotgun reads">
   		<filter>clean_pairends['clean_pairends_select'] == "y"</filter>
   	</data>
   	<data name="out_pe_qual" format="qual" label="${tool.name} on ${on_string}: shotgun reads">
   		<filter>clean_pairends['clean_pairends_select'] == "y"</filter>
   	</data>
  </outputs>
  <tests>
    <!--
    <test>
      <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" />
      <output name="output1_file" file="split_pair_reads_1.fastqsanger" />
      <output name="output2_file" file="split_pair_reads_2.fastqsanger" />
    </test>
    -->
  </tests>
  <help>
**What it does**

pyrocleaner is a product of the genotoul bioinformatic platform.

PyroCleaner is intended to clean reads coming from pyrosequencing in order to ease the assembly process.
It enables filtering sequences on multiple copy reads and other criteria such as length, complexity and
number of undetermined bases which has been proven to correlate with pour quality. It also permits to
clean sff files of paired-end sequences and generates on one side a sff with the validated paired-ends
and on the other the shotgun reads.


.. _pyrocleaner: https://mulcyber.toulouse.inra.fr/plugins/mediawiki/wiki/pyrocleaner/index.php/Main_Page

-----

**Input format**

Any sff, fasta or fastq file, for example if a fastq file::

    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR

-----

**Outputs**

A cleaned file in the same format as the input.
If the user ask to clean pairends sequences, additionals fasta and qual files will be created within sequences
trimmed from the spacer.

  </help>
</tool>
author	g2cmnty@test-web1.g2.bx.psu.edu
date	Thu, 09 Jun 2011 06:09:09 -0400
parents
children