view Contra/contra.xml @ 3:94362f37962e

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:43:53 -0400
parents 7564f3b1e675
children
line wrap: on
line source

<tool id="contra_tool" name="Contra Copy number analysis" version="1.0.0">
  <description>: Copy Number Analysis for Targeted Resequencing</description>
  <requirements>
    <requirement type="package" version="2.11.2">bedtools</requirement>
  </requirements>
  <command interpreter="perl">
    
	contra_wrapper.pl
	
	##Ref Genome
	#if $genomeSource.refGenomeSource == "history":
        	"PLAYEROPTION::-f=${genomeSource.ownFile}"
        #else:
        ##use precomputed indexes
        	"PLAYEROPTION::-f=${genomeSource.indices.fields.path}"
	#end if
	
	##Required files
	"PLAYEROPTION::-t=$target_file"
	"PLAYEROPTION::-s=$alignment_file"
	#if $controlSource.refControlSource == "history":
         	"PLAYEROPTION::-c=${controlSource.control_file}"
        #else:
        ##use precomputed indexes
        	"PLAYEROPTION::-c=${controlSource.indices.fields.path}"
        #end if
	
	##Optional parameter
        
        #if $option.option == "modify_parameters":
        
        	"PLAYEROPTION::--numBin=$option.numBin"
        	"PLAYEROPTION::--minReadDepth=$option.minReadDepth"
        	"PLAYEROPTION::--minNBases=$option.minNbases"
		
		#if str($option.sam) == "true":
	        	"PLAYEROPTION::--sam"
        	#end if
        	
        	#if str($option.bed) == "true":
	        	"PLAYEROPTION::--bed"
        	#end if
        	
        	"PLAYEROPTION::--pval=$option.pval"
        	"PLAYEROPTION::--sampleName=$option.sampleName"
		
		#if str($option.nomultimapped) == "true":
	        	"PLAYEROPTION::--nomultimapped"
        	#end if
        	
        	#if str($option.plot) == "true":
	        	"PLAYEROPTION::--plot"
        	#end if
        	
        	"PLAYEROPTION::--minExon=$option.minExon"
        	"PLAYEROPTION::--minControlRdForCall=$option.minControlRdForCall"
        	"PLAYEROPTION::--minTestRdForCall=$option.minTestRdForCall"
        	"PLAYEROPTION::--minAvgForCall=$option.minAvgForCall"
        	"PLAYEROPTION::--maxRegionSize=$option.maxRegionSize"
        	"PLAYEROPTION::--targetRegionSize=$option.targetRegionSize"
        	
        	#if str($option.largedeletion) == "true":
	        	"PLAYEROPTION::--largedeletion"
        	#end if
        	
        	"PLAYEROPTION::--smallSegment=$option.smallSegment"
        	"PLAYEROPTION::--targetRegionSize=$option.targetRegionSize"
        	"PLAYEROPTION::--largeSegment=$option.largeSegment"
        	"PLAYEROPTION::--lrCallStart=$option.lrCallStart"
        	"PLAYEROPTION::--lrCallEnd=$option.lrCallEnd"
        	"PLAYEROPTION::--passSize=$option.passSize"
        #end if
	
	##File to generate the bam list
	CONTRAOUTPUT::$html_file
	CONTRADIR::$html_file.files_path
	
  </command>
	<inputs>
	
		<conditional name="genomeSource">
			<param name="refGenomeSource" type="select" label="Will you select a reference from your history or use a built-in fasta file?">
				<option value="indexed">Use a built-in index</option>
				<option value="history">Use one from the history</option>
			</param>
			<when value="indexed">
				<param name="indices" type="select" label="Select a reference genome">
				  <options from_data_table="all_fasta">
				    <filter type="sort_by" column="2" />
				    <validator type="no_options" message="No indexes are available" />
				  </options>
				</param>
			</when>
			<when value="history">
				<param name="ownFile" type="data" format="fasta" label="Select a reference from history" />
			</when>
		</conditional>
	
		<param name="target_file" type="data" format="bed" help="" optional="false" />		
		<param name="alignment_file" type="data" format="bam,sam" help="" optional="false" />
		
		
		<conditional name="controlSource">
			<param name="refControlSource" type="select" label="Will you select a reference from your history or use a built-in control file?">
				<option value="indexed">Use a built-in control</option>
				<option value="history">Use one from the history</option>
			</param>
			<when value="indexed">
				<param name="indices" type="select" label="Select a baseline control">
				  <options from_data_table="baseline_files">
				    <filter type="sort_by" column="2" />
				    <validator type="no_options" message="No files available" />
				  </options>
				</param>
			</when>
			<when value="history">
				<param name="control_file" type="data" format="bam,sam,bed,tabular" help="" optional="false" />
			</when>
		</conditional>
		
		<conditional name="option">
			<param name="option" type="select" label="Optional Parameters" help="" optional="true">
				<option value="default_parameters" selected="true">Default Parameters</option>
				<option value="modify_parameters">Modify Parameters</option>
			</param>
			<when value="modify_parameters">
				<param name="numBin"  type="integer" value="20" optional="true" />
				<param name="minReadDepth"  type="integer" value="10" optional="true" />
				<param name="minNbases"  type="integer" value="10" optional="true" />
				<param name="sam" type="select" label="sam" help="" optional="true">
					<option value="true"  >true</option>
					<option value="false" selected="true">false</option>
				</param>
				<param name="bed" type="select" label="bed" help="" optional="true">
					<option value="true"  >true</option>
					<option value="false" selected="true">false</option>
				</param>
				<param name="pval"  type="float" value="0.05" optional="true" />
				<param name="sampleName" value="Contra_Output" type="text"  optional="true" />
				<param name="nomultimapped" type="select" label="no multimapped" help="" optional="true">
					<option value="true"  >true</option>
					<option value="false" selected="true">false</option>
				</param>
				<param name="plot" type="select" label="plot" help="" optional="true">
					<option value="true"  >true</option>
					<option value="false" selected="true">false</option>
				</param>
				<param name="minExon"  type="integer" value="2000" optional="true" />
				<param name="minControlRdForCall"  type="integer" value="5" optional="true" />
				<param name="minTestRdForCall"  type="integer" value="0" optional="true" />
				<param name="minAvgForCall"  type="integer" value="20" optional="true" />
				<param name="maxRegionSize"  type="integer" value="0" optional="true" />
				<param name="targetRegionSize"  type="integer" value="200" optional="true" />
				<param name="largedeletion" type="select" label="large deletion" help="" optional="true">
					<option value="true"  >true</option>
					<option value="false" selected="true">false</option>
				</param>
				
				<param name="smallSegment"  type="integer" value="1" optional="true" />
				<param name="largeSegment"  type="integer" value="25" optional="true" />
				<param name="lrCallStart"  type="float" value="-0.3" optional="true" />
				<param name="lrCallEnd"  type="float" value="0.3" optional="true" />
				<param name="passSize"  type="float" value="0.5" optional="true" />
		
			</when>
		</conditional>	
	</inputs>
	<outputs>
		<data name="html_file" format="html" label="Contra Output" />
	</outputs>
	<help>
|


**Reference**
	http://contra-cnv.sourceforge.net/

-----

**What it does**

CONTRA is a tool for copy number variation (CNV) detection for targeted resequencing data such as those from whole-exome capture data. CONTRA calls copy number gains and losses for each target region with key strategies include the use of base-level log-ratios to remove GC-content bias, correction for an imbalanced library size effect on log-ratios, and the estimation of log-ratio variations via binning and interpolation. It takes standard alignment formats (BAM/SAM) and output in variant call format (VCF 4.0) for easy integration with other next generation sequencing analysis package.


-----
 
**Required Parameters**

::

  -t, --target         Target region definition file [BED format] 

  -s, --test           Alignment file for the test sample [BAM/SAM] 

  -c, --control        Alignment file for the control sample 
                       [BAM/SAM/BED – baseline file]

  --bed                **option has to be supplied for control
                       with baseline file.** 

  -f, --fasta          Reference genome [FASTA]

  -o, --outFolder      the folder name (and its path) to store the output 
                       of the analysis (this new folder will be created – 
                       error message occur if the folder exists) 

-----

**Optional Parameters**

::

  --numBin              Numbers of bins to group the regions. User can 
                        specify multiple experiments with different numbers
                        of bins (comma separated). [Default: 20] 

  --minReadDepth        The threshold for minimum read depth for each bases 
                        (see Step 2 in CONTRA workflow) [Default: 10] 

  --minNBases           The threshold for minimum number of bases for each 
                        target regions (see Step 2 in CONTRA workflow) 
                        [Default: 10] 

  --sam                 If the specified test and control samples are in 
                        SAM format. [Default: False] (It will always take 
                        BAM samples as default) 

  --bed                 If specified, control will be a baseline file in 
                        BED format. [Default: False] 
                        Please refer to the Baseline Script section for 
                        instruction how to create baseline files from set 
                        of BAMfiles. A set of baseline files from different 
                        platform have also been provided in the CONTRA 
                        download page. 

  --pval                The p-value threshold for filtering. Based on Adjusted 
                        P-Values. Only regions that pass this threshold will 
                        be included in the VCF file. [Default: 0.05] 

  --sampleName          The name to be appended to the front of the default output 
                        name. By default, there will be nothing appended. 

  --nomultimapped       The option to remove multi-mapped reads 
                        (using SAMtools with mapping quality > 0). 
                        [default: FALSE] 

  -p, --plot            If specified, plots of log-ratio distribution for each 
                        bin will be included in the output folder [default: FALSE] 

  --minExon             Minimum number of exons in one bin (if less than this number
                        , bin that contains small number of exons will be merged to 
                        the adjacent bins) [Default : 2000] 

  --minControlRdForCall Minimum Control ReadDepth for call [Default: 5] 

  --minTestRdForCall    Minimum Test ReadDepth for call [Default: 0] 

  --minAvgForCall       Minimum average coverage for call [Default: 20] 

  --maxRegionSize       Maximum region size in target region (for breaking 
                        large regions into smaller regions. By default, 
                        maxRegionSize=0 means no breakdown). [Default : 0] 

  --targetRegionSize    Target region size for breakdown (if maxRegionSize 
                        is non-zero) [Default: 200] 

  -l, --largeDeletion   If specified, CONTRA will run large deletion analysis (CBS).
                        User must have DNAcopy R-library installed to run the 
                        analysis. [False] 

  --smallSegment        CBS segment size for calling large variations [Default : 1] 

  --largeSegment        CBS segment size for calling large variations [Default : 25] 

  --lrCallStart         Log ratios start range that will be used to call CNV 
                        [Default : -0.3] 

  --lrCallEnd           Log ratios end range that will be used to call CNV 
                        [Default : 0.3] 

  --passSize            Size of exons that passed the p-value threshold compare 
                        to the original exons size [Default: 0.5] 
	</help>
</tool>