view bbric_disco.xml @ 1:7ecd10051eff

change package repository
author cmonjeau
date Mon, 06 Jul 2015 15:50:38 +0000
parents 1beb3ed9e1e3
children
line wrap: on
line source

<tool id="discosnp_pp" name="discoSnp++" version="2.1.7">
<description>is an efficient tool for detecting SNPs without a reference genome.</description>
  <requirements>
    <requirement type="package" version="2.1.7">discoSnp_plus_plus</requirement>
    <requirement type="package" version="0.6.2">bwa</requirement>
  </requirements>
  <command interpreter="python">
bbric_disco.py
-r ${datfile}
-b $branching_bubbles
-D $deletions
-P $limit_snp
#if $low_complexity
-l
#end if
-k $kmer
#if (str($extension) == 't'):
-t
#end if
#if (str($extension) == 'T'):
-T
#end if
-c $coverage
-C ${maxcoverage}
-d $error_threshold
#if (str($VCF_option.mapping) == 'reference'):
-G ${VCF_option.reference}
-M ${VCF_option.mapping_error}
#end if
  </command>

  <inputs>             
	<repeat name="input_list" title="input files" min="1">
        	<param name="input" type="data" format="fasta,fastq,fastq.gz" label="input"/>
	</repeat>

	<param name="kmer" type="integer" label="Size of kmers" value="31" />
	<param name="coverage" type="integer" label="Minimal coverage per read set" value="4" />
  	<param name="maxcoverage" type="integer" label="Maximal coverage per read set" value="2147483647" help="default value = 2^31-1" />
	<param name="error_threshold" type="integer" label="Max number of errors per read" value="1" help="Max number of errors per read" />
	
	<param name="branching_bubbles" type="select" label="branching strategy">
		<option value="0">variants for which any of the two paths is branching are discarded</option>
		<option value="1">forbid SNPs for wich the two paths are branching</option>
		<option value="2">No limitation on branching</option>
        </param>

	<param name="deletions" type="integer" label="deletion size" value="0" help="If different of 0, discoSnp++ will search for deletions of size from 1 to the value included"/>
	<param name="limit_snp" type="integer" label="maximum SNPs per bubble" value="1" help="discoSnp++ will search up to the value SNPs in a unique bubble"/>
	<param name="low_complexity" type="boolean" default="False" checked="False" label="accept low complexity bubbles" />

	<param name="extension" type="select" label="extension strategy">
                <option value="n">extends to 30bp on left and right</option>
                <option value="t">extends left and right until a polymorphism s found (unitigs)</option>
                <option value="T">extends left and right using local assembly (contigs)</option>
        </param>
	<conditional name="VCF_option" >
		<param name="mapping" type="select" label="VCF option">
			<option value="default">Do not use reference genome</option>
			<option value="reference">Mapping with a reference genome</option>
		</param>
		<when value="defaut"></when>
		<when value="reference">
			<param name="reference" type="data" format="fasta,fastq" label="Reference genome file" />
			<param name="mapping_error" type="integer" value="4" label="Maximal number of mapping errors" help="during BWA mapping phase" />
		</when>
	</conditional>

  </inputs>

  <outputs>
<!--	 <data name="report" from_work_dir="report.txt" format="txt" label="Output of ${tool.name} on $on_string"/> -->
	 <data name="vcf" from_work_dir="coherent.vcf" format="vcf" label="VCF of ${tool.name} on $on_string"/>
    	<data name="fasta" from_work_dir="coherent.fasta" format="fasta" label="Multifasta of the polymorphisms -  ${tool.name} on $on_string"/>
</outputs>

 <configfiles>
        <configfile name="datfile">
        #for $i, $lib in enumerate ($input_list)
		${i}::${lib.input}
        #end for
 	</configfile>
   </configfiles>
  <help>

**Description**

Software discoSnp is designed for discovering Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS).
Note that number of input read sets is not constrained, it can be one, two, or more. Note also that no other data as reference genome or annotations are needed.
The software is composed by two modules. First module, kissnp2, detects SNPs from read sets. A second module, kissreads, enhance the kissnp2 results by computing per read set  and for each found SNP i/ its mean read coverage and ii/ the (phred) quality of reads generating the polymorphism.

Note that from release of DiscoSnp++-2.0.6, the tool also detects close SNPs and indels.

-------

.. class:: warningmark 

**Input parameters**

-Sequences files in fasta, fastq or fastq.gz, each allele will be counted in each file individually

-Fasta sequence of a genome if case of you are willing to map the sequence extension on a reference in order to get a compliant VCF

-------

.. class:: warningmark 

**Ouput parameters**

-VCF file with coordinates on the higher branch sequences or on a reference genome if provided 

-Fasta file with sequence extensions around the SNP. 
 

-------

**Web site**

https://colibread.inria.fr/software/discosnp/

-------

**Integrated by**

Cyril Monjeaud and Fabrice Legeai

GenOuest Bio-informatics Core Facility

UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)

support@genouest.org

If you use this tool in Galaxy, please cite :

`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_

  </help>
<citations>
<citation type="doi">10.1093/nar/gku1187</citation>
<citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
    author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
    title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
    booktitle = {JOBIM 2013 Proceedings},
    year = {2013},
    url = {https://www.e-biogenouest.org/resources/128},
    pages = {97-106}
    }
</citation>
</citations>
</tool>