comparison normalize_select_features_scale.xml @ 0:c3170652bd98 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat_v5 commit a9214c07b0cc929a51fd92a369bb89c675b6c88d
author iuc
date Wed, 11 Sep 2024 10:20:38 +0000
parents
children 6bccf5f85f92
comparison
equal deleted inserted replaced
-1:000000000000 0:c3170652bd98
1 <tool id="seurat_preprocessing" name="Seurat Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>- Normalize, Find Variable Features, Scale and Regress</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_command"/>
8 <command detect_errors="exit_code"><![CDATA[
9 @CMD@
10 ]]></command>
11 <configfiles>
12 <configfile name="script_file"><![CDATA[
13 @CMD_imports@
14 @CMD_read_inputs@
15
16 #if $method.method == 'NormalizeData'
17 seurat_obj<-NormalizeData(
18 seurat_obj,
19 #if $method.assay != ''
20 assay = '$method.assay',
21 #end if
22 normalization.method = '$method.normalization_method.normalization_method',
23 #if $method.normalization_method.normalization_method == 'CLR'
24 margin = $method.normalization_method.margin,
25 #end if
26 scale.factor = $method.scale_factor,
27 #if $method.block_size
28 block.size = $method.block_size
29 #end if
30 )
31
32 #else if $method.method == 'FindVariableFeatures'
33 seurat_obj<-FindVariableFeatures(
34 seurat_obj,
35 #if $method.assay != ''
36 assay = '$method.assay',
37 #end if
38 selection.method = '$method.selection_method.selection_method',
39 #if $method.selection_method.selection_method == 'vst'
40 loess.span = $method.selection_method.loess_span,
41 #if $method.selection_method.clip_max
42 clip.max = $method.selection_method.clip_max,
43 #end if
44 #if $method.selection_method.nfeatures
45 nfeatures = $method.selection_method.nfeatures,
46 #end if
47 #else if $method.selection_method.selection_method == 'dispersion'
48 #if $method.selection_method.nfeatures
49 nfeatures = $method.selection_method.nfeatures,
50 #end if
51 #end if
52 num.bin = $method.num_bin,
53 binning.method = '$method.binning_method'
54 )
55
56 #if $method.output_topN.output_topN == 'true'
57 N = $method.output_topN.topN
58 top_N<-head(VariableFeatures(seurat_obj), N)
59 @CMD_write_variable_tab@
60 #end if
61
62 #else if $method.method == 'ScaleData'
63
64 #if $method.scale_features.scale_features == 'list_genes'
65 features_list<-paste(readLines('$method.scale_features.features_to_scale'), collapse=",")
66 #else if $method.scale_features.scale_features == 'all_genes'
67 all.genes<-rownames(seurat_obj)
68 #end if
69
70 seurat_obj<-ScaleData(
71 seurat_obj,
72 #if $method.scale_features.scale_features == 'all_genes'
73 features = all.genes,
74 #else if $method.scale_features.scale_features == 'list_genes'
75 features = c(unlist(strsplit(features_list, ","))),
76 #end if
77 #if $method.assay != ''
78 assay = '$method.assay',
79 #end if
80 #if $method.regress.regress == 'true'
81 vars.to.regress = c(unlist(strsplit(gsub(" ", "", '$method.regress.vars_to_regress'), ","))),
82 model.use = '$method.regress.model_use',
83 use.umi = $method.regress.use_umi,
84 #end if
85 #if $method.split_by != ''
86 split.by = '$method.split_by',
87 #end if
88 do.scale = $method.do_scale,
89 do.center = $method.do_center,
90 #if $method.do_scale == 'true'
91 scale.max = $method.scale_max,
92 block.size = $method.block_size,
93 min.cells.to.block = $method.min_cells_to_block
94 #end if
95 )
96
97 #else if $method.method == 'SCTransform'
98
99 #if $method.residual_features.residual_features_options == 'selected_features'
100 features_list<-paste(readLines('$method.residual_features.residual_features'), collapse=",")
101 #end if
102
103 seurat_obj<-SCTransform(
104 seurat_obj,
105 #if $method.assay != ''
106 assay = '$method.assay',
107 #end if
108 new.assay.name = '$method.new_assay_name',
109 #if $method.residual_features.residual_features_options == 'NULL'
110 #if $method.residual_features.variable_features.variable_features == 'set_number'
111 variable.features.n = $method.residual_features.variable_features.variable_features_n,
112 #else if $method.residual_features.variable_features.variable_features == 'use_cutoff'
113 variable.features.rv.th = $method.residual_features.variable_features.variable_features_rv_th,
114 #end if
115 #else if $method.residual_features.residual_features_options == 'selected_features'
116 residual.features = c(unlist(strsplit(features_list, ","))),
117 #end if
118 #if $method.vars_to_regress != ''
119 vars.to.regress = c(unlist(strsplit(gsub(" ", "", '$method.vars_to_regress'), ","))),
120 #end if
121 do.scale = $method.do_scale,
122 do.center = $method.do_center,
123 #if $method.min_clip_range and $method.max_clip_range
124 clip.range = c($method.min_clip_range, $method.max_clip_range),
125 #end if
126 do.correct.umi = $method.adv.do_correct_umi,
127 ncells = $method.adv.ncells,
128 seed.use = $method.adv.seed_use,
129 vst.flavor = '$method.adv.vst_flavor',
130 conserve.memory = $method.adv.conserve_memory.conserve_memory,
131 #if $method.adv.conserve_memory.conserve_memory == 'FALSE'
132 return.only.var.genes = $method.adv.conserve_memory.return_only_var_genes
133 #end if
134 )
135
136 #if $method.output_topN.output_topN == 'true'
137 N = $method.output_topN.topN
138 top_N<-head(VariableFeatures(seurat_obj), N)
139 @CMD_write_variable_tab@
140 #end if
141
142 #end if
143
144 @CMD_rds_write_outputs@
145
146 ]]></configfile>
147 </configfiles>
148 <inputs>
149 <expand macro="input_rds"/>
150 <conditional name="method">
151 <param name="method" type="select" label="Method used">
152 <option value="NormalizeData">Normalize with 'NormalizeData'</option>
153 <option value="FindVariableFeatures">Identify highly variable genes with 'FindVariableFeatures'</option>
154 <option value="ScaleData">Scale and regress with 'ScaleData'</option>
155 <option value="SCTransform">Complete all preprocessing with 'SCTransform'</option>
156 </param>
157 <when value="NormalizeData">
158 <expand macro="select_assay"/>
159 <expand macro="normalize"/>
160 </when>
161 <when value="FindVariableFeatures">
162 <expand macro="select_assay"/>
163 <conditional name="selection_method">
164 <param name="selection_method" type="select" label="Method to select variable features" help="(selection.method)">
165 <option value="vst" selected="true">vst</option>
166 <option value="mean.var.plot">mean.var.plot</option>
167 <option value="dispersion">dispersion</option>
168 </param>
169 <when value="vst">
170 <param name="loess_span" type="float" value="0.3" label="Loess span parameter for fitting variance-mean relationship" help="(loess.span)"/>
171 <param name="clip_max" type="float" optional="true" value="" label="Maximum value after standardisation" help="leave blank to use default, the square root of number of cells (clip.max)"/>
172 <param argument="nfeatures" type="integer" optional="true" value="2000" label="Number of features to select as top variable features"/>
173 </when>
174 <when value="mean.var.plot"></when>
175 <when value="dispersion">
176 <param argument="nfeatures" type="integer" optional="true" value="2000" label="Number of features to select as top variable features"/>
177 </when>
178 </conditional>
179 <param name="num_bin" type="integer" value="20" label="Number of bins to use" help="(num.bin)"/>
180 <param name="binning_method" type="select" label="Method to compute bins" help="(binning.method)">
181 <option value="equal_width" selected="true">equal width</option>
182 <option value="equal_frequency">equal frequency</option>
183 </param>
184 <conditional name="output_topN">
185 <param name="output_topN" type="select" label="Output list of most variable features">
186 <option value="true">Yes</option>
187 <option value="false" selected="true">No</option>
188 </param>
189 <when value="true">
190 <expand macro="set_topN"/>
191 </when>
192 <when value="false">
193 </when>
194 </conditional>
195 </when>
196 <when value="ScaleData">
197 <expand macro="select_assay"/>
198 <conditional name="regress">
199 <param name="regress" type="select" label="Regress out a variable">
200 <option value="true">Yes</option>
201 <option value="false" selected="true">No</option>
202 </param>
203 <when value="true">
204 <param name="vars_to_regress" type="text" optional="true" value="" label="Variable(s) to regress out" help="comma-separated list e.g. percent.mt, nCount_RNA (vars.to.regress)">
205 <expand macro="valid_list"/>
206 </param>
207 <param name="model_use" type="select" optional="true" label="Model to use for regression" help="(model.use)">
208 <option value="linear" selected="true">linear</option>
209 <option value="poisson">poisson</option>
210 <option value="negbinom">negbinom</option>
211 </param>
212 <param name="use_umi" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Regress on UMI count data" help="recommended if regression model is not linear (use.umi)"/>
213 </when>
214 <when value="false">
215 </when>
216 </conditional>
217 <conditional name="scale_features">
218 <param name="scale_features" type="select" label="Features to scale">
219 <option value="variable_features" selected="true">Variable Features</option>
220 <option value="all_genes">All Features</option>
221 <option value="list_genes">Enter a list of features</option>
222 </param>
223 <when value="variable_features">
224 </when>
225 <when value="all_genes">
226 </when>
227 <when value="list_genes">
228 <param name="features_to_scale" type="data" format="txt,tabular" label="List of features to scale" help="text file with one feature on each line"/>
229 </when>
230 </conditional>
231 <param name="split_by" type="text" optional="true" value="" label="Scale cells in groups by a variable, vector or factor" help="(split.by)"/>
232 <param name="do_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Scale the data" help="(do.scale)"/>
233 <param name="do_center" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Center the data" help="(do.center)"/>
234 <param name="scale_max" type="float" value="10" label="Set max value for scaled data" help="(scale.max)"/>
235 <param name="block_size" type="integer" value="1000" label="Number of features to scale in a single computation" help="(block.size)"/>
236 <param name="min_cells_to_block" type="integer" value="3000" label="Use blocks for scaling if object has more than this number of cells" help="(min.cells.to.block)"/>
237 </when>
238 <when value="SCTransform">
239 <expand macro="select_assay_RNA"/>
240 <param name="new_assay_name" type="text" value="SCT" label="Name for new assay" help="to contain the normalized data (new.assay.name)">
241 <expand macro="valid_name"/>
242 </param>
243 <conditional name="residual_features">
244 <param name="residual_features_options" type="select" label="Genes to calculate residual features for" help="(residual.features)">
245 <option value="NULL" selected="true">all genes</option>
246 <option value="selected_features">selected features</option>
247 </param>
248 <when value="NULL">
249 <conditional name="variable_features">
250 <param name="variable_features" type="select" label="How to set variable features">
251 <option value="set_number" selected="true">set number of variable features</option>
252 <option value="use_cutoff">set cutoff for residual variance</option>
253 </param>
254 <when value="set_number">
255 <param name="variable_features_n" type="integer" value="3000" label="Use this many features as variable features" help="after ranking residual variance for all genes (variable.features.n)"/>
256 </when>
257 <when value="use_cutoff">
258 <param name="variable_features_rv_th" type="float" value="1.3" label="Use this residual variance cutoff" help="after calculating residual variance for all genes (variable.features.rv.th)"/>
259 </when>
260 </conditional>
261 </when>
262 <when value="selected_features">
263 <param name="residual_features" type="data" format="txt,tabular" label="List of genes to use" help="text file with one feature on each line. These genes will be set as VariableFeatures in returned object (residual.features)"/>
264 </when>
265 </conditional>
266 <conditional name="output_topN">
267 <param name="output_topN" type="select" label="Output list of most variable features">
268 <option value="true">Yes</option>
269 <option value="false" selected="true">No</option>
270 </param>
271 <when value="true">
272 <expand macro="set_topN"/>
273 </when>
274 <when value="false">
275 </when>
276 </conditional>
277 <param name="vars_to_regress" type="text" optional="true" value="" label="Variable(s) to regress out" help="comma-separated list e.g. percent.mt, nCount_RNA (vars.to.regress)">
278 <expand macro="valid_list"/>
279 </param>
280 <param name="do_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Scale the residuals" help="(do.scale)"/>
281 <param name="do_center" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Center the residuals" help="(do.center)"/>
282 <param name="min_clip_range" type="float" optional="true" value="" label="Minimum for residual variances" help="clip values below this or leave empty to use default of -sqrt(n/30) (clip.range)"/>
283 <param name="max_clip_range" type="float" optional="true" value="" label="Maximum for residual variances" help="clip values above this or leave empty to use default of sqrt(n/30) (clip.range)"/>
284 <section name="adv" title="Advanced Options">
285 <param name="do_correct_umi" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Place corrected UMI matrix in assay counts slot" help="(do.correct.umi)"/>
286 <param argument="ncells" type="integer" value="5000" label="Number of subsampling cells used to build NB regression"/>
287 <param name="seed_use" type="integer" value="1448145" label="Set random seed" help="--seed.use"/>
288 <param name="vst_flavor" type="select" label="Vst Flavor" help="version of sctransform to use(vst.flavor)">
289 <option value="v1">vs1</option>
290 <option value="v2" selected="true">vs2</option>
291 </param>
292 <conditional name="conserve_memory">
293 <param name="conserve_memory" type="select" label="Conserve memory" help="by not creating residual matrix for all genes (conserve.memory)">
294 <option value="FALSE" selected="true">No</option>
295 <option value="TRUE">Yes</option>
296 </param>
297 <when value="FALSE">
298 <param name="return_only_var_genes" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Only return variable genes in scale.data matrices" help="(return.only.var.genes)"/>
299 </when>
300 <when value="TRUE"></when>
301 </conditional>
302 </section>
303 </when>
304 </conditional>
305 <expand macro="inputs_common_advanced"/>
306 </inputs>
307 <outputs>
308 <expand macro="seurat_outputs"/>
309 <expand macro="variable_out"/>
310 </outputs>
311 <tests>
312 <test expect_num_outputs="2">
313 <!-- test1: NormalizeData -->
314 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/filtered.rds"/>
315 <conditional name="method">
316 <param name="method" value="NormalizeData"/>
317 <conditional name="normalization_method">
318 <param name="normalization_method" value="LogNormalize"/>
319 </conditional>
320 <param name="scale_factor" value="10000"/>
321 </conditional>
322 <section name="advanced_common">
323 <param name="show_log" value="true"/>
324 </section>
325 <output name="hidden_output">
326 <assert_contents>
327 <has_text_matching expression="NormalizeData"/>
328 </assert_contents>
329 </output>
330 <output name="rds_out" location="https://zenodo.org/records/13732784/files/normalized.rds" ftype="rds" compare="sim_size"/>
331 </test>
332 <test expect_num_outputs="3">
333 <!-- test2: FindVariableFeatures -->
334 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/normalized.rds"/>
335 <conditional name="method">
336 <param name="method" value="FindVariableFeatures"/>
337 <conditional name="selection_method">
338 <param name="selection_method" value="vst"/>
339 <param name="nfeatures" value="100"/>
340 </conditional>
341 <param name="num_bin" value="20"/>
342 <param name="binning_method" value="equal_width"/>
343 <conditional name="output_topN">
344 <param name="output_topN" value="true"/>
345 </conditional>
346 </conditional>
347 <section name="advanced_common">
348 <param name="show_log" value="true"/>
349 </section>
350 <output name="hidden_output">
351 <assert_contents>
352 <has_text_matching expression="FindVariableFeatures"/>
353 </assert_contents>
354 </output>
355 <output name="rds_out" location="https://zenodo.org/records/13732784/files/variablefeatures.rds" ftype="rds" compare="sim_size"/>
356 <output name="variable_tabular" location="https://zenodo.org/records/13732784/files/variable_top10.txt" ftype="txt">
357 <assert_contents>
358 <has_n_lines n="10"/>
359 </assert_contents>
360 </output>
361 </test>
362 <test expect_num_outputs="2">
363 <!-- test3: ScaleData -->
364 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/variablefeatures.rds"/>
365 <conditional name="method">
366 <param name="method" value="ScaleData"/>
367 <conditional name="scale_features">
368 <param name="scale_features" value="all_genes"/>
369 </conditional>
370 </conditional>
371 <section name="advanced_common">
372 <param name="show_log" value="true"/>
373 </section>
374 <output name="hidden_output">
375 <assert_contents>
376 <has_text_matching expression="ScaleData"/>
377 </assert_contents>
378 </output>
379 <output name="rds_out" location="https://zenodo.org/records/13732784/files/scaled.rds" ftype="rds" compare="sim_size"/>
380 </test>
381 <test expect_num_outputs="2">
382 <!-- test4: SCTransform -->
383 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/filtered.rds"/>
384 <conditional name="method">
385 <param name="method" value="SCTransform"/>
386 <param name="vars_to_regress" value="percent.mt"/>
387 <conditional name="residual_features">
388 <param name="residual_features_options" value="selected_features"/>
389 <param name="residual_features" location="https://zenodo.org/records/13741333/files/residual_features.txt"/>
390 </conditional>
391 </conditional>
392 <section name="advanced_common">
393 <param name="show_log" value="true"/>
394 </section>
395 <output name="hidden_output">
396 <assert_contents>
397 <has_text_matching expression="SCTransform"/>
398 </assert_contents>
399 </output>
400 <output name="rds_out" location="https://zenodo.org/records/13732784/files/SCTransformed.rds" ftype="rds" compare="sim_size"/>
401 </test>
402 </tests>
403 <help><![CDATA[
404 Seurat
405 ======
406
407 Seurat is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data.
408
409 Seurat aims to enable users to identify and interpret sources of heterogeneity from single-cell transcriptomic measurements, and to integrate diverse types of single-cell data.
410
411 NormalizeData
412 =============
413
414 Normalize the count data present in a given assay.
415
416 Methods:
417
418 “LogNormalize”: Feature counts for each cell are divided by the total counts for that cell and multiplied by the scale.factor. This is then natural-log transformed using log1p
419
420 “CLR”: Applies a centered log ratio transformation
421
422 “RC”: Relative counts. Feature counts for each cell are divided by the total counts for that cell and multiplied by the scale.factor. No log-transformation is applied. For counts per million (CPM) set scale.factor = 1e6
423
424
425 More details on the `seurat documentation
426 <https://satijalab.org/seurat/reference/normalizedata>`__
427
428 FindVariableFeatures
429 ====================
430
431 Identify features that are outliers on a 'mean variability plot'.
432
433 Methods:
434
435 “vst”: First, fits a line to the relationship of log(variance) and log(mean) using local polynomial regression (loess). Then standardizes the feature values using the observed mean and expected variance (given by the fitted line). Feature variance is then calculated on the standardized values after clipping to a maximum (see clip.max parameter).
436
437 “mean.var.plot” (mvp): First, uses a function to calculate average expression (mean.function, using FastExpMean) and dispersion (dispersion.function, using FastLogVMR) for each feature. Next, divides features into num.bin (deafult 20) bins based on their average expression, and calculates z-scores for dispersion within each bin. The purpose of this is to identify variable features while controlling for the strong relationship between variability and average expression
438
439 “dispersion” (disp): selects the genes with the highest dispersion values
440
441 More details on the `seurat documentation
442 <https://satijalab.org/seurat/reference/findvariablefeatures>`__
443
444 Scale and regress the data with ScaleData
445 =========================================
446
447 Scale and center features in the dataset.
448
449 If variables are provided in vars.to.regress, they are individually regressed against each feature, and the resulting residuals are then scaled and centered.
450
451 More details on the `seurat documentation
452 <https://satijalab.org/seurat/reference/scaledata>`__
453
454 SCTransform
455 ===========
456
457 Use this function as an alternative to the NormalizeData, FindVariableFeatures, ScaleData workflow.
458
459 Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, data being log1p(counts), scale.data being pearson residuals; sctransform::vst intermediate results are saved in misc slot of new assay.
460
461 More details on the `seurat documentation
462 <https://satijalab.org/seurat/reference/sctransform>`__
463
464 ]]></help>
465 <expand macro="citations"/>
466 </tool>