Mercurial > repos > sblanck > mpagenomics
comparison preprocess.xml @ 0:4d539083cf7f draft
planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author | sblanck |
---|---|
date | Tue, 12 May 2020 10:40:36 -0400 |
parents | |
children | 3fcbb8030fcc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4d539083cf7f |
---|---|
1 <tool id="preprocess" name="Data Normalization" force_history_refresh="True" version="1.1.0"> | |
2 <requirements> | |
3 <!--requirement type="set_environment">R_SCRIPT_PATH</requirement--> | |
4 <requirement type="package" version="1.1.2">mpagenomics</requirement> | |
5 </requirements> | |
6 <!--command interpreter="python"--> | |
7 <command> | |
8 <![CDATA[ | |
9 Rscript | |
10 ${__tool_directory__}/preprocess.R | |
11 --summary '$summary' | |
12 --new_file_path '$__new_file_path__' | |
13 --inputcdffull_name '$inputcdffull.name' | |
14 --inputufl_name '$inputufl.name' | |
15 --inputugp_name '$inputugp.name' | |
16 --inputacs_name '$inputacs.name' | |
17 --inputcdffull '$inputcdffull' | |
18 --inputufl '$inputufl' | |
19 --inputugp '$inputugp' | |
20 --inputacs '$inputacs' | |
21 --dataSetName '$datasetName' | |
22 #if $settings.settingsType == "tumor": | |
23 --tumorcsv '$tumorcsv' | |
24 #end if | |
25 #if $settings.settingsType == "standard": | |
26 --tumorcsv 'none' | |
27 #end if | |
28 --settingsType '$settings.settingsType' | |
29 --outputgraph '$outputgraph' | |
30 --zipfigures '$zipfigures' | |
31 --outputlog '$outputlog' | |
32 --log '$log' | |
33 --user_id '$__user_id__' | |
34 --input "#for $input in $inputs# $input;$input.name, #end for#" | |
35 ]]> | |
36 | |
37 </command> | |
38 <inputs> | |
39 <param name="datasetName" type="text" label="Dataset Name"/> | |
40 <param name="inputs" type="data" format="cel" multiple="True" label="Cel files dataset" help="Cel files dataset previously uploaded with the Multiple File Datasets tool."/> | |
41 <param name="inputcdffull" type="data" format="cdf" label="cdf file" help=".cdf file name must comply with the following format : < chiptype >,< tag >.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf)." /> | |
42 <param name="inputufl" type="data" format="ufl" label="ufl file" help=".ufl file name must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl)."/> | |
43 <param name="inputugp" type="data" format="ugp" label="ugp file" help=".ugp file name must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp)."/> | |
44 <param name="inputacs" type="data" format="acs" label="acs file" help=".acs file name must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs)."/> | |
45 <conditional name="settings"> | |
46 <param name="settingsType" type="select" label="Reference"> | |
47 <option value="standard">Study without reference</option> | |
48 <option value="tumor">Normal-tumor study with TumorBoost</option> | |
49 </param> | |
50 <when value="standard" /> | |
51 <when value="tumor"> | |
52 <param name="tumorcsv" type="data" format="csv" label="TumorBoost csv file" help="Normal-tumor csv file. See below for more information."/> | |
53 </when> | |
54 </conditional> | |
55 <!--param name="outputgraph" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output figures" /--> | |
56 <!--param name="outputlog" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output log" /--> | |
57 <param name="outputgraph" type="select" label="Output figures"> | |
58 <option value="TRUE">Yes</option> | |
59 <option value="FALSE">No</option> | |
60 </param> | |
61 <param name="outputlog" type="select" label="Output log"> | |
62 <option value="TRUE">Yes</option> | |
63 <option value="FALSE">No</option> | |
64 </param> | |
65 <!--param name="chipType" type="text" label="chipType" /--> | |
66 <!--param name="workspace" type="text" label="Workspace"/--> | |
67 </inputs> | |
68 | |
69 <outputs> | |
70 <!-- Would like to make this hidden or not appear all together, but | |
71 variable outputs require a primary dataset. If hidden refresh | |
72 doesn't occur. | |
73 --> | |
74 <data format="dsf" name="summary" label="Dataset summary file of ${datasetName}" /> | |
75 <data format="zip" name="zipfigures" label="figures of normalization of ${datasetName}"> | |
76 <filter>outputgraph == "TRUE"</filter> | |
77 </data> | |
78 <data format="log" name="log" label="log of normalization ${datasetName}"> | |
79 <filter>outputlog == "TRUE"</filter> | |
80 </data> | |
81 </outputs> | |
82 | |
83 <stdio> | |
84 <exit_code range="1:" level="fatal" description="See logs for more details" /> | |
85 </stdio> | |
86 | |
87 <help> | |
88 | |
89 **What it does** | |
90 | |
91 This preprocessing step consists in a correction of biological and technical biaises due to the experiment. Raw data from Affymetrix arrays are provided in different CEL files. These data must be normalized before statistical analysis. | |
92 The pre-processing is proposed as a wrapper of aroma.* packages (using CRMAv2 and TumorBoost when appropriate). Note that this implies that the pre-processing step is only available for Affymetrix arrays. | |
93 | |
94 ----- | |
95 | |
96 **Chip file naming conventions** | |
97 | |
98 Chip filenames must strictly follow the following rules : | |
99 | |
100 - *.cdf* filename must comply with the following format : < chiptype >,< tag >.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf). Note the use of a comma (not a point) between <chiptype> and the tag "Full". | |
101 | |
102 - *.ufl* filename must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl). | |
103 | |
104 - *.ugp* filename must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ugp). | |
105 | |
106 - *.acs* file name must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,HB20080710.acs). | |
107 | |
108 ----- | |
109 | |
110 **Normal-tumor study with TumorBoost** | |
111 | |
112 In cases where normal (control) samples match to tumor samples, normalization can be improved using TumorBoost. In this case, a normal-tumor csv file must be provided : | |
113 | |
114 - The first column contains the names of the files corresponding to normal samples of the dataset. | |
115 | |
116 - The second column contains the names of the tumor samples files. | |
117 | |
118 - Column names of these two columns are respectively normal and tumor. | |
119 | |
120 - Columns are separated by a comma. | |
121 | |
122 - *Extensions of the files (.CEL for example) should be removed* | |
123 | |
124 | |
125 | |
126 **Example** | |
127 | |
128 Let 6 .cel files in the dataset studied (3 patients, each of them being represented by a couple of normal and tumor cel files.) :: | |
129 | |
130 patient1_normal.cel | |
131 patient1_tumor.cel | |
132 patient2_normal.cel | |
133 patient2_tumor.cel | |
134 patient3_normal.cel | |
135 patient3_tumor.cel | |
136 | |
137 | |
138 The csv file should look like this :: | |
139 | |
140 normal,tumor | |
141 patient1_normal,patient1_tumor | |
142 patient2_normal,patient2_tumor | |
143 patient3_normal,patient3_tumor | |
144 | |
145 | |
146 ----- | |
147 | |
148 **Citation** | |
149 | |
150 When using this tool, please cite : | |
151 | |
152 `Q. Grimonprez, A. Celisse, M. Cheok, M. Figeac, and G. Marot. MPAgenomics : An R package for multi-patients analysis of genomic markers, 2014. Preprint <http://fr.arxiv.org/abs/1401.5035>`_ | |
153 | |
154 As CRMAv2 normalization is used, please also cite `H. Bengtsson, P. Wirapati, and T. P. Speed. A single-array preprocessing method for estimating full-resolution raw copy numbers from all Affymetrix genotyping arrays including GenomeWideSNP 5 & 6. Bioinformatics, 5(17):2149–2156, 2009. <http://bioinformatics.oxfordjournals.org/content/25/17/2149.short>`_ | |
155 | |
156 When using TumorBoost to improve normalization in a normal-tumor study, please cite `H. Bengtsson, P. Neuvial, and T. P. Speed. TumorBoost: Normalization of allele-specific tumor copy numbers from a single pair of tumor-normal genotyping microarrays. BMC Bioinformatics, 11, 2010 <http://www.biomedcentral.com/1471-2105/11/245>`_ | |
157 | |
158 </help> | |
159 </tool> |