changeset 4:f7a84d31bd83 draft

Uploaded
author bzeitouni
date Mon, 11 Jun 2012 12:30:38 -0400
parents 861783bb65d2
children ba8c5e544948
files SVDetect_run_parallel.xml
diffstat 1 files changed, 324 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SVDetect_run_parallel.xml	Mon Jun 11 12:30:38 2012 -0400
@@ -0,0 +1,324 @@
+<tool id="svdetect_run_parallel" name="Detect clusters of anomalously mapped pairs">
+
+<description>and identify structural variants</description>
+
+<command interpreter="perl">SVDetect_run_parallel.pl
+
+#if $getLinks.linking == "linking"
+linking
+<!-- -out1 '$links_file' -->
+#end if
+#if $getFilteredLinks.filtering == "filtering"
+filtering
+<!--- out2 '$flinks_file' -->
+#if str($getFilteredLinks.links2SV) == "create"
+links2SV
+-out3 '$sv_file'
+#end if
+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2circos) == "create"
+links2circos
+-out4 '$circos_file'
+#end if
+#if  $getFilteredLinks.file_conversion.file_conversion_select=="convert" and str($getFilteredLinks.file_conversion.links2bed) == "create"
+links2bed
+-out5 '$bed_file'
+#end if
+#end if
+-conf '$config_file'
+-l '$log_file'
+-N '$sample_name'
+
+</command>
+
+<inputs>
+	<param name="sample_name" type="text" value="sample" label="Sample Name"/>
+	<param name="mates_file" format="bam" type="data" label="Input BAM file (.ab.bam)"/>
+   	<param name="cmap_file" format="len" type="data" label="Chromosomes list file (.len)" help="Tabulated file format with Chromosome ID (integer from 1), name and length"/>
+  	<param name="mates_orientation" type="select" format="txt" label="Type of sequencing technology and libraries">
+		<option value="FR">Illumina paired-ends</option>
+		<option value="RF">Illumina mate-pairs</option>
+		<option value="FR">SOLiD paired-ends</option>
+		<option value="RR">SOLiD mate-pairs</option>
+   	</param>
+	<param name="read1_length" type="integer" size="10" value="50" label="Read 1 length (bp)" help="Length of the first read in a pair (left read)"/>
+	<param name="read2_length" type="integer" size="10" value="50" label="Read 2 length (bp)" help="Length of the second read in a pair (right read)"/>
+	<param name="sv_type" type="select" format="txt" label="Type of SV to detect">
+		<option value="all">all types of SVs</option>
+		<option value="intra">intrachromosomal SVs only</option>
+		<option value="inter">interchromosomal SVs only</option>
+  	</param>
+   	
+   	<conditional name="getLinks">
+   		<param name="linking" type="select" label="Linking procedure" help="Detection and isolation of links">
+			<option value="linking">Yes</option>
+			<option value="">No, already done</option>
+  		</param>
+		<when value="">
+    			<!-- do nothing here -->
+   		</when>
+    		<when value="linking">
+			<param name="splitmate" label="Do you want to split the original mate file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="True" help="Untick it if already done"/>
+    			<param name="window_size" type="integer" size="20" value="3000" label="Window size (bp)" help="Equal to at least “2µ+2√2σ"/>
+			<param name="step_length" type="integer" size="20" value="250" label="Step length size (bp)" help="Equal to 1/2 or 1/4 of the window size"/>
+    		</when>
+   	</conditional>
+
+   	<conditional name="getFilteredLinks">
+	   	<param name="filtering" type="select" label="Filtering procedure" help="Filtering of links according different parameters and thresholds">
+			<option value="filtering">Yes</option>
+                        <option value="">No</option>
+	  	</param>
+		<when value="">
+	    		<!-- do nothing here -->
+	   	</when>
+	    	<when value="filtering">
+			
+			<param name="splitlink" label="Do you want to split the original link file per chromosome for parallel computing?" type="boolean" truevalue="split" falsevalue="do_not_split" checked="False" help="Untick it if (the linking is) already done"/>
+			<param name="chromosomes" type="text" size="20" label="List of chromosome names to keep or exclude"/>
+			<param name="nb_pairs_threshold" type="integer" size="20" value="5" label="Minimum number of pairs in a cluster"/>
+		
+			<conditional name="filter1">
+	   			<param name="strand_filtering" type="select" label="Strand filtering procedure">
+					<option value="strand">Yes</option>
+					<option value="">No</option>
+	  			</param>
+				<when value="">
+	    				<!-- do nothing here -->
+	   			</when>
+	    			<when value="strand">
+
+					<conditional name="filter2">
+			   			<param name="order_filtering" type="select" label="Order filtering procedure">
+							<option value="order">Yes</option>
+							<option value="">No</option>
+			  			</param>
+						<when value="">
+			    				<!-- do nothing here -->
+			   			</when>
+			    			<when value="order">
+
+							<conditional name="filter3">
+					   			<param name="insert_size_filtering" type="select" label="Insert-size filtering procedure">
+									<option value="insert">Yes</option>
+									<option value="">No</option>
+					  			</param>
+								<when value="">
+					    				<!-- do nothing here -->
+					   			</when>
+					    			<when value="insert">
+									<param name="indel_sigma_threshold" type="float" size="20" value="3" label="Minimal number of sigma fold for the insert size filtering and to call insertions and deletions"/>
+									<param name="dup_sigma_threshold" type="float" size="20" value="3" label="minimal number of sigma fold for the insert size filtering to call tandem duplications"/>
+									<param name="singleton_sigma_threshold" type="float" size="20" value="4" label="Minimal number of sigma fold for the insert size filtering to call singletons" help="for Illumina mate-pairs only"/>
+			    					</when>
+			   				</conditional>
+
+							<param name="mu_length" type="integer" size="20" value="3000" label="Mean insert size value (µ) of normally mapped mate-pairs, in bp"/>
+							<param name="sigma_length" type="integer" size="20" value="250" label="Calculated sd value (σ) from the distribution of normally mapped  mate-pairs, in bp"/>
+			    				<param name="nb_pairs_order_threshold" type="integer" size="20" value="2" label="Minimal number of pairs in a subgroup of paired-end reads for balanced events"/>
+			    			</when>
+			   		</conditional>
+						
+					<param name="final_score_threshold" type="float" size="20"  value="1.0" label="Minimal final filtering score for calling SVs" help="A value of 1 means all the pairs in a cluster were consistent between each other after applying filters"/>
+	    			</when>
+	   		</conditional>
+		
+			<param name="links2SV" label="Do you want to have filtered links in a tabulated file format showing significant SVs?" type="boolean" truevalue="create" falsevalue="do_not_create" checked="True"/>
+		
+			<conditional name="file_conversion">
+				<param name="file_conversion_select" type="select" label="Output file conversion" help="Converts filtered links to Circos/BED files format for graphical view of SVs">
+					<option value="do_not_convert">No</option>
+					<option value="convert">Yes</option>
+				</param>
+				<when value="do_not_convert">
+					    <!-- do nothing here -->
+				</when>
+				<when value="convert">
+					<param name="links2circos" label="Converts the link list to the Circos link format" type="boolean" truevalue="create" falsevalue="do_not_create" checked="True"/>
+					<param name="links2bed" label="Converts the link list to the UCSC BED format" type="boolean" truevalue="create" falsevalue="do_not_create" checked="False"/>
+					<param name="organism_id" type="text" size="10" value="hs" label="Organism ID"/>
+					<repeat name="color_code" title="Color-code" min="1" max="7">
+						<param name="color" type="select" label="Color">
+							<option value="grey">grey</option>
+							<option value="black">black</option>
+							<option value="blue">blue</option>
+							<option value="green">green</option>
+							<option value="purple">purple</option>
+							<option value="orange">orange</option>
+							<option value="red">red</option>
+						</param>
+						<param name="interval" type="text" value="1,3" label="Interval"/>
+					</repeat>
+				</when>
+			</conditional>
+    		</when>
+	</conditional>
+</inputs>
+
+
+<outputs>
+	<!--<data format="txt" name="links_file" label="svdetect.links">
+		<filter>getLinks['linking']=="linking"</filter>
+	</data>
+	<data format="txt" name="flinks_file" label="svdetect.links.filtered">
+		<filter>getFilteredLinks['filtering']=="filtering"</filter>
+	</data>-->
+	<data format="sv" name="sv_file" label="${sample_name}.sv">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['links2SV'] is True
+			)
+	         </filter>
+	</data>
+	<data format="segdup" name="circos_file" label="${sample_name}.segdup">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['file_conversion']['file_conversion_select']=="convert" and
+			getFilteredLinks['file_conversion']['links2circos'] is True
+			)
+	         </filter>
+	</data>
+	<data format="bed" name="bed_file" label="${sample_name}.bed">
+		<filter>(
+			getFilteredLinks['filtering']=="filtering" and
+			getFilteredLinks['file_conversion']['file_conversion_select']=="convert" and
+			getFilteredLinks['file_conversion']['links2bed'] is True
+			)
+	         </filter>
+	</data>
+	<data format="txt" name="log_file" label="${sample_name}.svdetect_run.log"/>
+</outputs>
+
+
+
+<configfiles>
+	<configfile name="config_file">
+&lt;general&gt;
+input_format = bam
+sv_type = ${sv_type}
+mates_orientation=${mates_orientation}
+read1_length=${read1_length}
+read2_length=${read2_length}
+mates_file=${mates_file}
+cmap_file=${cmap_file}
+tmp_dir=$__new_file_path__/svdetect/tmp
+output_dir=$__new_file_path__/svdetect
+num_threads=8
+&lt;/general&gt; 
+
+#if $getLinks.linking == "linking"
+&lt;detection&gt;
+#if str($getLinks.splitmate) == "split"
+split_mate_file=1
+#else
+split_mate_file=0
+#end if
+window_size=${getLinks.window_size}
+step_length=${getLinks.step_length}
+&lt;/detection&gt; 
+#end if
+
+#if $getFilteredLinks.filtering == "filtering"
+&lt;filtering&gt;
+#if str($getFilteredLinks.splitlink) == "split"
+split_link_file=1
+#else
+split_link_file=0
+#end if
+#if str($getFilteredLinks.chromosomes) != ""
+chromosomes=${getFilteredLinks.chromosomes}
+#end if
+nb_pairs_threshold=${getFilteredLinks.nb_pairs_threshold}
+#if $getFilteredLinks.filter1.strand_filtering == "strand"
+strand_filtering=1
+final_score_threshold=${getFilteredLinks.filter1.final_score_threshold}
+#if $getFilteredLinks.filter1.filter2.order_filtering == "order"
+order_filtering=1
+mu_length=${getFilteredLinks.filter1.filter2.mu_length}
+sigma_length=${getFilteredLinks.filter1.filter2.sigma_length}
+nb_pairs_order_threshold=${getFilteredLinks.filter1.filter2.nb_pairs_order_threshold}
+#if $getFilteredLinks.filter1.filter2.filter3.insert_size_filtering == "insert"
+insert_size_filtering=1
+indel_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.indel_sigma_threshold}
+dup_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.dup_sigma_threshold}
+singleton_sigma_threshold=${getFilteredLinks.filter1.filter2.filter3.singleton_sigma_threshold}
+#else
+insert_size_filtering=0
+#end if
+#else
+order_filtering=0
+#end if
+#else
+strand_filtering=0
+#end if
+&lt;/filtering&gt; 
+#end if
+
+#if $getFilteredLinks.filtering == "filtering"
+#if $getFilteredLinks.file_conversion.file_conversion_select == "convert"
+#if str($getFilteredLinks.file_conversion.links2circos) == "create"
+&lt;circos&gt;
+organism_id=${getFilteredLinks.file_conversion.organism_id}
+&lt;colorcode&gt;
+#for $color_repeat in $getFilteredLinks.file_conversion.color_code
+${color_repeat.color}=${color_repeat.interval}
+#end for
+&lt;/colorcode&gt;
+&lt;/circos&gt;
+#end if
+#if str($getFilteredLinks.file_conversion.links2bed) == "create"
+&lt;bed&gt;
+&lt;colorcode&gt;
+#for $color_repeat in $getFilteredLinks.file_conversion.color_code
+#if str($color_repeat.color)== "grey"
+190,190,190=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "black"
+0,0,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "blue"
+0,0,255=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "green"
+0,255,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "purple"
+153,50,205=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "orange"
+255,140,0=${color_repeat.interval}
+#end if
+#if str($color_repeat.color)== "red"
+255,0,0=${color_repeat.interval}
+#end if
+#end for
+&lt;/colorcode&gt;
+&lt;/bed&gt;
+#end if
+#end if
+#end if	
+	</configfile>
+</configfiles>
+
+  <help>
+**What it does**
+
+SVDetect - Version : 0.8
+
+Parallel version (nCPU=8)
+
+SVDetect is a application for the isolation and the type prediction of intra- and inter-chromosomal rearrangements from paired-end/mate-pair sequencing data provided by the high-throughput sequencing technologies
+
+This tool aims to identifying structural variations (SVs) with both clustering and sliding-window strategies, and helping in their visualization at the genome scale.
+SVDetect is compatible with SOLiD and Illumina (>=1.3) reads.
+
+Manual documentation available at the http://svdetect.sourceforge.net/Site/Manual.html
+
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of SVDetect.
+
+  </help>
+
+</tool>