Mercurial > repos > sblanck > mpagenomics

diff preprocess.xml @ 0:4d539083cf7f draft
planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author: sblanck
date: Tue, 12 May 2020 10:40:36 -0400
children: 3fcbb8030fcc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocess.xml	Tue May 12 10:40:36 2020 -0400
@@ -0,0 +1,159 @@
+<tool id="preprocess" name="Data Normalization" force_history_refresh="True" version="1.1.0">
+  	<requirements>
+    <!--requirement type="set_environment">R_SCRIPT_PATH</requirement-->
+    <requirement type="package" version="1.1.2">mpagenomics</requirement>
+   </requirements>
+	<!--command interpreter="python"-->
+    <command>
+    	<![CDATA[ 
+        Rscript 
+        ${__tool_directory__}/preprocess.R	
+		--summary '$summary' 
+		--new_file_path '$__new_file_path__'  
+		--inputcdffull_name '$inputcdffull.name' 
+		--inputufl_name '$inputufl.name' 
+		--inputugp_name '$inputugp.name' 
+		--inputacs_name '$inputacs.name'
+		--inputcdffull '$inputcdffull' 
+		--inputufl '$inputufl' 
+		--inputugp '$inputugp' 
+		--inputacs '$inputacs'
+		--dataSetName '$datasetName'
+  	#if $settings.settingsType == "tumor":
+  	--tumorcsv '$tumorcsv' 
+  	#end if
+  	#if $settings.settingsType == "standard":
+  	--tumorcsv 'none'
+  	#end if
+  	 --settingsType '$settings.settingsType' 
+	 --outputgraph '$outputgraph' 
+	 --zipfigures '$zipfigures' 
+	 --outputlog '$outputlog' 
+	 --log '$log' 
+	 --user_id '$__user_id__'
+	 --input "#for $input in $inputs# $input;$input.name, #end for#"
+    	]]>
+     
+  </command>
+  <inputs>
+  	<param name="datasetName" type="text" label="Dataset Name"/>
+  	<param name="inputs" type="data" format="cel" multiple="True" label="Cel files dataset" help="Cel files dataset previously uploaded with the Multiple File Datasets tool."/>
+    <param name="inputcdffull" type="data" format="cdf" label="cdf file" help=".cdf file name must comply with the following format : &lt; chiptype &gt;,&lt; tag &gt;.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf)." />
+    <param name="inputufl" type="data" format="ufl" label="ufl file" help=".ufl file name must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl)."/>
+    <param name="inputugp" type="data" format="ugp" label="ugp file" help=".ugp file name must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp)."/>
+    <param name="inputacs" type="data" format="acs" label="acs file" help=".acs file name must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs)."/>
+	<conditional name="settings">
+      <param name="settingsType" type="select" label="Reference">
+        <option value="standard">Study without reference</option>
+        <option value="tumor">Normal-tumor study with TumorBoost</option>
+      </param>
+      <when value="standard" />
+      <when value="tumor">
+        <param name="tumorcsv" type="data" format="csv" label="TumorBoost csv file" help="Normal-tumor csv file. See below for more information."/>
+      </when>
+    </conditional>
+    <!--param name="outputgraph" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output figures" /-->
+    <!--param name="outputlog" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output log" /-->
+	<param name="outputgraph" type="select" label="Output figures">
+        <option value="TRUE">Yes</option>
+        <option value="FALSE">No</option>
+      </param>
+    <param name="outputlog" type="select" label="Output log">
+        <option value="TRUE">Yes</option>
+        <option value="FALSE">No</option>
+    </param>
+    <!--param name="chipType" type="text" label="chipType" /-->
+    <!--param name="workspace" type="text" label="Workspace"/-->
+  </inputs>
+  
+  <outputs>
+  	<!-- Would like to make this hidden or not appear all together, but
+  	     variable outputs require a primary dataset. If hidden refresh 
+  	     doesn't occur. 
+  	-->
+    <data format="dsf" name="summary" label="Dataset summary file of ${datasetName}" />
+    <data format="zip" name="zipfigures" label="figures of normalization of ${datasetName}">
+    	<filter>outputgraph == "TRUE"</filter>	
+    </data>    
+    <data format="log" name="log" label="log of normalization ${datasetName}">
+    	<filter>outputlog == "TRUE"</filter>
+    </data>
+  </outputs>
+  
+  <stdio>
+    <exit_code range="1:"   level="fatal"   description="See logs for more details" />
+   </stdio>
+   
+     <help>
+  
+**What it does**
+     	
+This preprocessing step consists in a correction of biological and technical biaises due to the experiment. Raw data from Affymetrix arrays are provided in different CEL files. These data must be normalized before statistical analysis.
+The pre-processing is proposed as a wrapper of aroma.* packages (using CRMAv2 and TumorBoost when appropriate). Note that this implies that the pre-processing step is only available for Affymetrix arrays.
+
+-----
+     	
+**Chip file naming conventions**
+      	
+Chip filenames must strictly follow the following rules :
+     	
+- *.cdf* filename must comply with the following format : &lt; chiptype &gt;,&lt; tag &gt;.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf). Note the use of a comma (not a point) between &lt;chiptype&gt; and the tag "Full".
+
+- *.ufl* filename must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl).
+
+- *.ugp* filename must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp).   	
+
+- *.acs* file name must start with  &lt; chiptype &gt;,&lt; tag &gt; (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs).
+     	
+-----
+     	
+**Normal-tumor study with TumorBoost**
+     	
+In cases where normal (control) samples match to tumor samples, normalization can be improved using TumorBoost. In this case, a normal-tumor csv file must be provided :
+
+	- The first column contains the names of the files corresponding to normal samples of the dataset.
+     	 
+	- The second column contains the names of the tumor samples files. 
+     	
+	- Column names of these two columns are respectively normal and tumor.
+     	
+	- Columns are separated by a comma.
+     	
+	- *Extensions of the files (.CEL for example) should be removed*
+
+
+     	
+**Example** 
+
+Let 6 .cel files in the dataset studied (3 patients, each of them being represented by a couple of normal and tumor cel files.) ::
+     	
+     	patient1_normal.cel
+     	patient1_tumor.cel
+     	patient2_normal.cel
+     	patient2_tumor.cel
+     	patient3_normal.cel 
+     	patient3_tumor.cel
+      	
+
+The csv file should look like this ::
+     	
+     	normal,tumor
+     	patient1_normal,patient1_tumor
+     	patient2_normal,patient2_tumor
+     	patient3_normal,patient3_tumor
+
+     	
+-----
+     	
+**Citation**
+	
+When using this tool, please cite : 
+
+`Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint &lt;http://fr.arxiv.org/abs/1401.5035&gt;`_
+     	
+As CRMAv2 normalization is used, please also cite `H. Bengtsson, P. Wirapati, and T. P. Speed. A single-array preprocessing method for estimating full-resolution raw copy numbers from all Affymetrix genotyping arrays including GenomeWideSNP 5 &amp; 6. Bioinformatics, 5(17):2149–2156, 2009. &lt;http://bioinformatics.oxfordjournals.org/content/25/17/2149.short&gt;`_
+
+When using TumorBoost to improve normalization in a normal-tumor study, please cite `H. Bengtsson, P. Neuvial, and T. P. Speed. TumorBoost: Normalization of allele-specific tumor copy numbers from a single pair of tumor-normal genotyping microarrays. BMC Bioinformatics, 11, 2010 &lt;http://www.biomedcentral.com/1471-2105/11/245&gt;`_
+
+  </help>
+</tool>
author	sblanck
date	Tue, 12 May 2020 10:40:36 -0400
parents
children	3fcbb8030fcc