comparison create_seurat.xml @ 0:d0c26c9430f2 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat_v5 commit a9214c07b0cc929a51fd92a369bb89c675b6c88d
author iuc
date Wed, 11 Sep 2024 10:21:11 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d0c26c9430f2
1 <tool id="seurat_create" name="Seurat Create" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>- Prepare data for the pipeline</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_command"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #if str($method.method) == 'CreateSeuratObject':
10 #if str($method.input_type.input_type) == 'mtx':
11 ln -s '$method.input_type.matrix' matrix.mtx &&
12 ln -s '$method.input_type.gene_names' genes.tsv &&
13 ln -s '$method.input_type.cell_barcodes' barcodes.tsv &&
14 #else if str($method.input_type.input_type) == 'tab':
15 ln -s '$method.input_type.input_tab' input.tab;
16 #if str($method.input_type.citeseq.citeseq) == 'true':
17 ln -s '$method.input_type.citeseq.citeseq_table' citeseq.tab &&
18 #end if
19 #end if
20 cat '$script_file' > $hidden_output &&
21 Rscript '$script_file' >> $hidden_output
22 #else:
23 @CMD@
24 #end if
25 ]]></command>
26 <configfiles>
27 <configfile name="script_file"><![CDATA[
28 @CMD_imports@
29
30 #if $method.method == 'CreateSeuratObject'
31 #if $method.input_type.input_type == 'mtx'
32 counts<-Read10X(
33 data.dir = '.',
34 gene.column = $method.input_type.gene_column,
35 cell.column = $method.input_type.cell_column,
36 unique.features = $method.input_type.unique_features,
37 strip.suffix = $method.input_type.strip_suffix
38 )
39
40 #if $method.meta_data
41 meta_data<-read.table(
42 "$method.meta_data",
43 header = TRUE,
44 row.names = 1,
45 sep = "\t"
46 )
47 for (name in colnames(meta_data)) {
48 meta_data[[name]]<-gsub("^$", "N/A", trimws(meta_data[[name]]))
49 meta_data[[name]][is.na(meta_data[[name]])]<-"N/A"
50 }
51 #end if
52
53 seurat_obj<-CreateSeuratObject(
54 counts,
55 assay = '$method.assay',
56 #if $method.names_field != ''
57 names.field = $method.names_field,
58 #end if
59 #if $method.names_delim != ''
60 names.delim = '$method.names_delim',
61 #end if
62 #if $method.min_cells
63 min.cells = $method.min_cells,
64 #end if
65 #if $method.min_features
66 min.features = $method.min_features,
67 #end if
68 #if $method.meta_data
69 meta.data = meta_data
70 #end if
71 )
72
73 #if $method.percent_mt.percent_mt == 'true'
74 seurat_obj[["percent.mt"]]<-PercentageFeatureSet(
75 seurat_obj,
76 pattern = '$method.percent_mt.pattern',
77 )
78 #end if
79
80 #if $method.input_type.citeseq_boolean == 'true'
81 seurat_obj[['ADT']]<-CreateAssayObject(counts[['Antibody Capture']], colnames(x = seurat_obj))
82 #end if
83
84 #else if $method.input_type.input_type == 'tab'
85 counts<-read.table(
86 'input.tab',
87 header = TRUE,
88 row.names = 1,
89 sep = "\t"
90 )
91
92 #if $method.meta_data
93 meta_data<-read.table(
94 "$method.meta_data",
95 header = TRUE,
96 row.names = 1,
97 sep = "\t"
98 )
99 for (name in colnames(meta_data)) {
100 meta_data[[name]]<-gsub("^$", "N/A", trimws(meta_data[[name]]))
101 meta_data[[name]][is.na(meta_data[[name]])]<-"N/A"
102 }
103 #end if
104
105 seurat_obj<-CreateSeuratObject(
106 counts,
107 assay = '$method.assay',
108 #if $method.names_field != ''
109 names.field = $method.names_field,
110 #end if
111 #if $method.names_delim != ''
112 names.delim = '$method.names_delim',
113 #end if
114 #if $method.min_cells
115 min.cells = $method.min_cells,
116 #end if
117 #if $method.min_features
118 min.features = $method.min_features,
119 #end if
120 #if $method.meta_data
121 meta.data = meta_data
122 #end if
123 )
124
125 #if $method.percent_mt.percent_mt == 'true'
126 seurat_obj[["percent.mt"]]<-PercentageFeatureSet(
127 seurat_obj,
128 pattern = '$method.percent_mt.pattern',
129 )
130 #end if
131
132 #if $method.input_type.citeseq.citeseq == 'true'
133 citeseq<-read.table(
134 'citeseq.tab',
135 header = TRUE,
136 row.names = 1,
137 sep = "\t"
138 )
139
140 seurat_obj[['ADT']]<-CreateAssay5Object(counts = citeseq)
141 #end if
142 #end if
143
144 #else if $method.method == 'Add_QC_Metrics'
145 @CMD_read_inputs@
146
147 #if $method.match.match == 'list'
148 features_list<-paste(readLines('$method.match.features'), collapse=",")
149 #end if
150
151 seurat_obj[['$method.col_name']]<-PercentageFeatureSet(
152 seurat_obj,
153 #if $method.match.match == 'pattern'
154 pattern = '$method.match.pattern',
155 #else if $method.match.match == 'list'
156 features = c(unlist(strsplit(features_list, ","))),
157 #end if
158 #if $method.assay != ''
159 assay = '$method.assay'
160 #end if
161 )
162
163 #else if $method.method == 'FilterCells'
164 @CMD_read_inputs@
165 #if $method.minimum_nFeature_RNA
166 seurat_obj<-subset(
167 seurat_obj,
168 subset = nFeature_RNA > $method.minimum_nFeature_RNA
169 )
170 #end if
171 #if $method.maximum_nFeature_RNA
172 seurat_obj<-subset(
173 seurat_obj,
174 subset = nFeature_RNA < $method.maximum_nFeature_RNA
175 )
176 #end if
177 #if $method.minimum_nCount_RNA
178 seurat_obj<-subset(
179 seurat_obj,
180 subset = nCount_RNA > $method.minimum_nCount_RNA
181 )
182 #end if
183 #if $method.maximum_nCount_RNA
184 seurat_obj<-subset(
185 seurat_obj,
186 subset = nCount_RNA < $method.maximum_nCount_RNA
187 )
188 #end if
189 #if $method.minimum_percent_mt
190 seurat_obj<-subset(
191 seurat_obj,
192 subset = percent.mt> $method.minimum_percent_mt
193 )
194 #end if
195 #if $method.maximum_percent_mt
196 seurat_obj<-subset(
197 seurat_obj,
198 subset = percent.mt < $method.maximum_percent_mt
199 )
200 #end if
201 #if $method.other.other == 'true'
202 #if $method.other.minimum
203 seurat_obj<-subset(
204 seurat_obj,
205 subset = $method.other.other_variable > $method.other.minimum
206 )
207 #end if
208 #if $method.other.maximum
209 seurat_obj<-subset(
210 seurat_obj,
211 subset = $method.other.other_variable < $method.other.maximum
212 )
213 #end if
214 #end if
215
216 #end if
217
218 @CMD_rds_write_outputs@
219
220 ]]></configfile>
221 </configfiles>
222 <inputs>
223 <conditional name="method">
224 <param name="method" type="select" label="Method used">
225 <option value="CreateSeuratObject">Create Seurat Object</option>
226 <option value="Add_QC_Metrics">Add QC Metrics</option>
227 <option value="FilterCells">Filter cells by QC metrics</option>
228 </param>
229 <when value="CreateSeuratObject">
230 <conditional name="input_type">
231 <param name="input_type" type="select" label="Select format of input">
232 <option value="mtx" selected="true">matrix market (for e.g. 10x data)</option>
233 <option value="tab">tab-delimited text</option>
234 </param>
235 <when value="mtx">
236 <param name="matrix" type="data" format="mtx" label="Counts matrix with features as rows, cells as columns (.mtx)"/>
237 <param name="citeseq_boolean" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Matrix includes citeseq data"/>
238 <param name="gene_names" type="data" format="tsv,tabular" label="List of gene names (for rows)"/>
239 <param name="cell_barcodes" type="data" format="tsv,tabular" label="List of cell barcodes (for columns)"/>
240 <param name="gene_column" type="integer" value="2" label="Column of gene table to use as gene names" help="(gene.column)"/>
241 <param name="cell_column" type="integer" value="1" label="Column of cell table to use as cell names" help="(cell.column)"/>
242 <param name="unique_features" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Make feature names unique" help="(unique.features)"/>
243 <param name="strip_suffix" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove trailing -1 if present in all cell barcodes" help="(strip.suffix)"/>
244 </when>
245 <when value="tab">
246 <param name="input_tab" type="data" format="tsv,tabular" label="Expression table with features as rows, cells as columns" help="(file)"/>
247 <conditional name="citeseq">
248 <param name="citeseq" type="select" label="Add citeseq assay">
249 <option value="false" selected="true">No</option>
250 <option value="true">Yes</option>
251 </param>
252 <when value="false"></when>
253 <when value="true">
254 <param name="citeseq_table" type="data" format ="tsv,tabular" label="CITE-Seq data to add"/>
255 </when>
256 </conditional>
257 </when>
258 </conditional>
259 <param name="meta_data" type="data" format="tsv,tabular" optional="true" label="Additional cell metadata to add" help="table with cell names in first column (meta.data)"/>
260 <param argument="assay" type="text" value="RNA" label="Name of assay to create">
261 <expand macro="valid_name"/>
262 </param>
263 <param name="min_cells" type="integer" optional="true" value="0" label="Include features detected in at least this many cells" help="(min.cells)"/>
264 <param name="min_features" type="integer" optional="true" value="0" label="Include cells where at least this many features are detected" help="(min.features)"/>
265 <param name="names_field" type="integer" optional="true" value="" label="Field of cell names to use as cell identity class" help="(names.field)"/>
266 <param name="names_delim" type="text" optional="true" value="" label="Delimiter for cell names" help="(names.delim)"/>
267 <conditional name="percent_mt">
268 <param name="percent_mt" type="select" label="Calculate percentage of mito genes in each cell">
269 <option value="false" selected="true">No</option>
270 <option value="true">Yes</option>
271 </param>
272 <when value="false">
273 </when>
274 <when value="true">
275 <param argument="pattern" type="text" value="^MT-" label="Pattern/regex to match in gene names" help="e.g. the default '^MT-' matches gene names starting with 'MT-' which are human mitochondrial genes"/>
276 </when>
277 </conditional>
278 </when>
279 <when value="Add_QC_Metrics">
280 <expand macro="input_rds"/>
281 <conditional name="match">
282 <param name="match" type="select" label="Calculate percentage of reads based on">
283 <option value="pattern" selected="true">Pattern in gene names</option>
284 <option value="list">List of features</option>
285 </param>
286 <when value="pattern">
287 <param argument="pattern" type="text" value="^MT-" label="Pattern/regex to match in gene names" help="e.g. the default '^MT-' matches gene names starting with 'MT-' which are human mitochondrial genes"/>
288 </when>
289 <when value="list">
290 <param argument="features" type="data" format="txt,tabular" label="List of genes to match" help="text file with one feature on each line"/>
291 </when>
292 </conditional>
293 <param name="col_name" type="text" value="percent.mt" label="Name to store the variable as">
294 <expand macro="valid_name"/>
295 </param>
296 <expand macro="select_assay"/>
297 </when>
298 <when value="FilterCells">
299 <expand macro="input_rds"/>
300 <param name="minimum_nFeature_RNA" type="integer" optional="true" value="" label="Minimum nFeature_RNA"/>
301 <param name="maximum_nFeature_RNA" type="integer" optional="true" value="" label="Maximum nFeature_RNA"/>
302 <param name="minimum_nCount_RNA" type="integer" optional="true" value="" label="Minimum nCount_RNA"/>
303 <param name="maximum_nCount_RNA" type="integer" optional="true" value="" label="Maximum nCount_RNA"/>
304 <param name="minimum_percent_mt" type="float" optional="true" value="" label="Minimum percent.mt"/>
305 <param name="maximum_percent_mt" type="float" optional="true" value="" label="Maximum percent.mt"/>
306 <conditional name="other">
307 <param name="other" type="select" label="Filter by a different metric">
308 <option value="false" selected="true">No</option>
309 <option value="true">Yes</option>
310 </param>
311 <when value="false">
312 </when>
313 <when value="true">
314 <param name="other_variable" type="text" value="" label="Enter name of cell metric to filter" help="e.g. percent.ribo or other metrics calculated using 'Add QC Metrics'"/>
315 <param name="minimum" type="float" optional="true" value="" label="Minimum"/>
316 <param name="maximum" type="float" optional="true" value="" label="Maximum"/>
317 </when>
318 </conditional>
319 </when>
320 </conditional>
321 <expand macro="inputs_common_advanced"/>
322 </inputs>
323 <outputs>
324 <expand macro="seurat_outputs"/>
325 </outputs>
326 <tests>
327 <test expect_num_outputs="2">
328 <!-- test1: CreateSeuratObject from matrix -->
329 <conditional name="method">
330 <param name="method" value="CreateSeuratObject"/>
331 <conditional name="input_type">
332 <param name="input_type" value="mtx"/>
333 <param name="matrix" location="https://zenodo.org/records/13732784/files/matrix.mtx"/>
334 <param name="gene_names" location="https://zenodo.org/records/13732784/files/genes.tsv"/>
335 <param name="cell_barcodes" location="https://zenodo.org/records/13732784/files/barcodes.tsv"/>
336 <param name="gene_column" value="1"/>
337 </conditional>
338 <conditional name="percent_mt">
339 <param name="percent_mt" value="true"/>
340 <param name="pattern" value="^Mt"/>
341 </conditional>
342 </conditional>
343 <section name="advanced_common">
344 <param name="show_log" value="true"/>
345 </section>
346 <output name="hidden_output">
347 <assert_contents>
348 <has_text_matching expression="Read10X"/>
349 <has_text_matching expression="CreateSeuratObject"/>
350 </assert_contents>
351 </output>
352 <output name="rds_out" location="https://zenodo.org/records/13732784/files/rawdata.rds" ftype="rds"/>
353 </test>
354 <test expect_num_outputs="2">
355 <!-- test2: CreateSeuratObject from tabular -->
356 <conditional name="method">
357 <param name="method" value="CreateSeuratObject"/>
358 <conditional name="input_type">
359 <param name="input_type" value="tab"/>
360 <param name="input_tab" location="https://zenodo.org/records/13732784/files/counts.tsv"/>
361 </conditional>
362 </conditional>
363 <section name="advanced_common">
364 <param name="show_log" value="true"/>
365 </section>
366 <output name="hidden_output">
367 <assert_contents>
368 <has_text_matching expression="read.table"/>
369 <has_text_matching expression="CreateSeuratObject"/>
370 </assert_contents>
371 </output>
372 <output name="rds_out" location="https://zenodo.org/records/13732784/files/rawdata2.rds" ftype="rds"/>
373 </test>
374 <test expect_num_outputs="2">
375 <!-- test3: CreateSeuratObject with CITE-Seq -->
376 <conditional name="method">
377 <param name="method" value="CreateSeuratObject"/>
378 <conditional name="input_type">
379 <param name="input_type" value="tab"/>
380 <param name="input_tab" location="https://zenodo.org/records/13732784/files/rna.tab"/>
381 <conditional name="citeseq">
382 <param name="citeseq" value="true"/>
383 <param name="citeseq_table" location="https://zenodo.org/records/13732784/files/adt.tab"/>
384 </conditional>
385 </conditional>
386 </conditional>
387 <param name="min_features" value=""/>
388 <section name="advanced_common">
389 <param name="show_log" value="true"/>
390 </section>
391 <output name="hidden_output">
392 <assert_contents>
393 <has_text_matching expression="read.table"/>
394 <has_text_matching expression="CreateSeuratObject"/>
395 </assert_contents>
396 </output>
397 <output name="rds_out" location="https://zenodo.org/records/13732784/files/citeseq.rds" ftype="rds"/>
398 </test>
399 <test expect_num_outputs="2">
400 <!-- test4: Add_QC_Metrics -->
401 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/rawdata.rds"/>
402 <conditional name="method">
403 <param name="method" value="Add_QC_Metrics"/>
404 <conditional name="match">
405 <param name="match" value="pattern"/>
406 <param name="pattern" value="^Rp"/>
407 </conditional>
408 <param name="col_name" value="percent.ribo"/>
409 </conditional>
410 <section name="advanced_common">
411 <param name="show_log" value="true"/>
412 </section>
413 <output name="hidden_output">
414 <assert_contents>
415 <has_text_matching expression="PercentageFeatureSet"/>
416 <has_text_matching expression="percent.ribo"/>
417 </assert_contents>
418 </output>
419 <output name="rds_out" location="https://zenodo.org/records/13732784/files/ribodata.rds" ftype="rds"/>
420 </test>
421 <test expect_num_outputs="2">
422 <!-- test5: FilterCells -->
423 <param name="seurat_rds" location="https://zenodo.org/records/13732784/files/ribodata.rds"/>
424 <conditional name="method">
425 <param name="method" value="FilterCells"/>
426 <param name="minimum_nCount_RNA" value="1"/>
427 <param name="maximum_nCount_RNA" value="20000000"/>
428 <conditional name="other">
429 <param name="other" value="true"/>
430 <param name="other_variable" value="percent.mt"/>
431 <param name="maximum" value="2"/>
432 </conditional>
433 </conditional>
434 <section name="advanced_common">
435 <param name="show_log" value="true"/>
436 </section>
437 <output name="hidden_output">
438 <assert_contents>
439 <has_text_matching expression="subset"/>
440 </assert_contents>
441 </output>
442 <output name="rds_out" location="https://zenodo.org/records/13732784/files/filtered.rds" ftype="rds"/>
443 </test>
444 </tests>
445 <help><![CDATA[
446 Seurat
447 ======
448
449 Seurat is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data.
450
451 Seurat aims to enable users to identify and interpret sources of heterogeneity from single-cell transcriptomic measurements, and to integrate diverse types of single-cell data.
452
453 Creating a Seurat Object
454 ========================
455
456 Seurat objects can be created from single cell data in matrix market or tab-delimited table formats, using the Read10X or read.table functions followed by CreateSeuratObject.
457 The input should be a single cell matrix with cells as rows and genes as columns.
458
459 Both RNA-seq and combined RNA and CITE-seq data can be used as inputs.
460
461 Read10X
462 ========
463
464 Load sparse data matrices provided by 10X genomics.
465
466 More details on the `seurat documentation
467 <https://satijalab.org/seurat/reference/read10x>`__
468
469 read.table
470 ==========
471
472 Read a tab-delimited tsv or tabular file into an RDS file as a table.
473
474 More details on the `R documentation
475 <https://www.rdocumentation.org/packages/utils/versions/3.6.2/topics/read.table>`__
476
477 CreateSeuratObject
478 ==================
479
480 Create a Seurat Object from raw data in RDS format.
481
482 names.field
483
484 For the initial identity class for each cell, choose this field from the cell's name.
485 E.g. If your cells are named as BARCODE_CLUSTER_CELLTYPE in the input matrix, set names.field to 3 to set the initial identities to CELLTYPE.
486
487 names.delim
488
489 For the initial identity class for each cell, choose this delimiter from the cell's column name.
490 E.g. If your cells are named as BARCODE-CLUSTER-CELLTYPE, set this to “-” to separate the cell name into its component parts for picking the relevant field.
491
492 meta.data
493
494 Additional cell-level metadata to add to the Seurat object. Should be a data.frame where the rows are cell names and the columns are additional metadata fields.
495 Row names in the metadata need to match the column names of the counts matrix.
496
497 Filtering can also be performed on:
498
499 min.cells = only include features/genes detected in at least this many cells
500
501 min.features = only include cells where at least this many features are detected
502
503 Some QC metrics are added when creating a Seurat Object (nCount_RNA and nFeature_RNA).
504 Mito percentage can optionally be calculated - it will be based on gene names starting with "MT-". If this pattern does not work for your gene names then you can use the separate 'Calculate QC Metrics' function instead.
505
506 More details on the `seurat documentation
507 <https://satijalab.github.io/seurat-object/reference/CreateSeuratObject.html>`__
508
509 Calculate QC Metrics
510 ====================
511
512 Calculate the percentage of all the counts belonging to a subset of the possible features for each cell. This is useful when trying to compute the percentage of transcripts that map to mitochondrial genes for example.
513 The calculation here is simply the column sum of the matrix present in the counts slot for features belonging to the set divided by the column sum for all features times 100.
514
515 Feature sets can be defined by entering a list of genes or using a shared pattern in the gene names, such as "^MT-" or "^RP[LS]" for human mitochondrial or ribosomal genes.
516
517 More details on the `seurat documentation
518 <https://satijalab.org/seurat/reference/percentagefeatureset>`__
519
520 Filter Cells
521 ============
522
523 Filter cells based on QC metrics.
524
525 nFeature_RNA = number of unique genes identified in the cell
526
527 ncounts_RNA = total number of RNAs found in the cell
528
529 percent.mt = percentage of mitochondrial genes in the cell
530
531 More details on the `R documentation
532 <https://rdrr.io/r/base/subset.html>`__
533
534
535 ]]></help>
536 <expand macro="citations"/>
537 </tool>