Mercurial > repos > ebi-gxa > seurat_select_integration_features
changeset 0:699c0ca328f2 draft default tip
planemo upload commit 0264c359f1d638bbbbab515a3502231f679cdcf6
| author | ebi-gxa | 
|---|---|
| date | Sat, 02 Mar 2024 10:40:57 +0000 | 
| parents | |
| children | |
| files | extra/macro_mapper_seurat.yaml get_test_data.sh scripts/seurat-scale-data.R seurat_macros.xml seurat_select_integration_features.xml | 
| diffstat | 5 files changed, 773 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/macro_mapper_seurat.yaml Sat Mar 02 10:40:57 2024 +0000 @@ -0,0 +1,90 @@ +--- +- option_group: + - input-object-file + - input-format + pre_command_macros: + - INPUT_OBJ_PREAMBLE + post_command_macros: + - INPUT_OBJECT + input_declaration_macros: + - input_object_params +- option_group: + - output-object-file + - output-format + post_command_macros: + - OUTPUT_OBJECT + input_declaration_macros: + - output_object_params + output_declaration_macros: + - output_files +- option_group: + - input-object-files + - input-format + pre_command_macros: + - INPUT_OBJS_PREAMBLE + post_command_macros: + - INPUT_OBJECTS + input_declaration_macros: + - input_object_params: + multiple: true +- option_group: + - reference-object-files + - reference-format + pre_command_macros: + - REFERENCE_OBJS_PREAMBLE + post_command_macros: + - REFERENCE_OBJECTS + input_declaration_macros: + - input_object_params: + varname: reference + multiple: true + optional: true +- option_group: + - reference-object-file + - reference-format + pre_command_macros: + - REFERENCE_OBJ_PREAMBLE + post_command_macros: + - REFERENCE_OBJECT + input_declaration_macros: + - input_object_params: + varname: reference +- option_group: + - anchors-object-file + - anchors-format + pre_command_macros: + - ANCHORS_OBJ_PREAMBLE + post_command_macros: + - ANCHORS_OBJECT + input_declaration_macros: + - input_object_params: + varname: anchors +- option_group: + - query-object-file + - query-format + pre_command_macros: + - QUERY_OBJ_PREAMBLE + post_command_macros: + - QUERY_OBJECT + input_declaration_macros: + - input_object_params: + varname: query +- option_group: + - plot-out + post_command_macros: + - OUTPUT_PLOT + output_declaration_macros: + - plot_output_files_format: + format: png + - plot_output_files_format: + format: pdf + - plot_output_files_format: + format: eps + - plot_output_files_format: + format: jpg + - plot_output_files_format: + format: ps + - plot_output_files_format: + format: tiff + - plot_output_files_format: + format: svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_test_data.sh Sat Mar 02 10:40:57 2024 +0000 @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +BASENAME_FILE='E-MTAB-6077-3k_features_90_cells' + +MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23' +RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n' +NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU' +FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ' +SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0' +PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c' +NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX' +CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa' +TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO' +MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD' + +LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX' +H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz' + +H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp' +SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe' + +# Seurat v4 exclusives +IFNB_BASE_FILE='ifnb_' + +IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL' +IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54' + +## Classify and UMAP mapping +CLASSIFY_QUERY_LINK='https://oc.ebi.ac.uk/s/MlEDILFYRrvkS6E/download' +CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i' +# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download' +UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download' + + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# get matrix data +mkdir -p test-data +pushd test-data +get_data $MTX_LINK mtx.zip +unzip mtx.zip +rm -f mtx.zip + +get_data $RDS_LINK $BASENAME_FILE".rds" +get_data $NORM_LINK $BASENAME_FILE"-normalised.rds" +get_data $FVG_LINK $BASENAME_FILE"-fvg.rds" +get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds" +get_data $PCA_LINK $BASENAME_FILE"-pca.rds" +get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds" +get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds" +get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds" +get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip" + +unzip $BASENAME_FILE"-markers.csv.zip" +rm -f $BASENAME_FILE"-markers.csv.zip" + +get_data $LOOM_LINK $BASENAME_FILE"_loom.h5" +get_data $SCE_LINK $BASENAME_FILE"_sce.rds" +get_data $H5AD_LINK $BASENAME_FILE".h5ad" + +get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad" + +get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds" +get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds" + +get_data $CLASSIFY_QUERY_LINK "Classify_query.rds" +get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds" +get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds" +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/seurat-scale-data.R Sat Mar 02 10:40:57 2024 +0000 @@ -0,0 +1,165 @@ +#!/usr/bin/env Rscript + +# Load optparse we need to check inputs + +suppressPackageStartupMessages(require(optparse)) + +# Load common functions + +suppressPackageStartupMessages(require(workflowscriptscommon)) + +# parse options + +option_list = list( + make_option( + c("-i", "--input-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which a serialized R matrix object may be found." + ), + make_option( + c("--input-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read." + ), + make_option( + c("--output-format"), + action = "store", + default = "seurat", + type = 'character', + help = "Either loom, seurat, anndata or singlecellexperiment for the output format." + ), + make_option( + c("-e", "--genes-use"), + action = "store", + default = NULL, + type = 'character', + help = "File with gene names to scale/center (one gene per line). Default is all genes in object@data." + ), + make_option( + c("-v", "--vars-to-regress"), + action = "store", + default = NULL, + type = 'character', + help = "Comma-separated list of variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito." + ), + make_option( + c("-m", "--model-use"), + action = "store", + default = 'linear', + type = 'character', + help = "Use a linear model or generalized linear model (poisson, negative binomial) for the regression. Options are 'linear' (default), 'poisson', and 'negbinom'." + ), + make_option( + c("-u", "--use-umi"), + action = "store", + default = FALSE, + type = 'logical', + help = "Regress on UMI count data. Default is FALSE for linear modeling, but automatically set to TRUE if model.use is 'negbinom' or 'poisson'." + ), + make_option( + c("-s", "--do-not-scale"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip the data scale." + ), + make_option( + c("-c", "--do-not-center"), + action = "store_true", + default = FALSE, + type = 'logical', + help = "Skip data centering." + ), + make_option( + c("-x", "--scale-max"), + action = "store", + default = 10, + type = 'double', + help = "Max value to return for scaled data. The default is 10. Setting this can help reduce the effects of genes that are only expressed in a very small number of cells. If regressing out latent variables and using a non-linear model, the default is 50." + ), + make_option( + c("-b", "--block-size"), + action = "store", + default = 1000, + type = 'integer', + help = "Default size for number of genes to scale at in a single computation. Increasing block.size may speed up calculations but at an additional memory cost." + ), + make_option( + c("-d", "--min-cells-to-block"), + action = "store", + default = 1000, + type = 'integer', + help = "If object contains fewer than this number of cells, don't block for scaling calculations." + ), + make_option( + c("-n", "--check-for-norm"), + action = "store", + default = TRUE, + type = 'logical', + help = "Check to see if data has been normalized, if not, output a warning (TRUE by default)." + ), + make_option( + c("-o", "--output-object-file"), + action = "store", + default = NA, + type = 'character', + help = "File name in which to store serialized R object of type 'Seurat'.'" + ) +) + +opt <- wsc_parse_args(option_list, mandatory = c('input_object_file', 'output_object_file')) + +# Check parameter values + +if ( ! file.exists(opt$input_object_file)){ + stop((paste('File', opt$input_object_file, 'does not exist'))) +} + +if (! is.null(opt$genes_use)){ + if (! file.exists(opt$genes_use)){ + stop((paste('Supplied genes file', opt$genes_use, 'does not exist'))) + }else{ + genes_use <- readLines(opt$genes_use) + } +}else{ + genes_use <- NULL +} + +# break up opt$vars_to_regress into a list if it has commas +opt$vars_to_regress <- unlist(strsplit(opt$vars_to_regress, ",")) + +# Now we're hapy with the arguments, load Seurat and do the work + +suppressPackageStartupMessages(require(Seurat)) +if(opt$input_format == "loom" | opt$output_format == "loom") { + suppressPackageStartupMessages(require(SeuratDisk)) +} else if(opt$input_format == "singlecellexperiment" | opt$output_format == "singlecellexperiment") { + suppressPackageStartupMessages(require(scater)) +} + +# Input from serialized R object + +seurat_object <- read_seurat4_object(input_path = opt$input_object_file, format = opt$input_format) +# https://stackoverflow.com/questions/9129673/passing-list-of-named-parameters-to-function +# might be useful +scaled_seurat_object <- ScaleData(seurat_object, + features = genes_use, + vars.to.regress = opt$vars_to_regress, + model.use = opt$model_use, + use.umi = opt$use_umi, + do.scale = !opt$do_not_scale, + do.center = !opt$do_not_center, + scale.max = opt$scale_max, + block.size = opt$block_size, + min.cells.to.block = opt$min_cells_to_block, + verbose = FALSE) + + +# Output to a serialized R object +write_seurat4_object(seurat_object = scaled_seurat_object, + output_path = opt$output_object_file, + format = opt$output_format)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_macros.xml Sat Mar 02 10:40:57 2024 +0000 @@ -0,0 +1,354 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">4.0.0</token> + <token name="@SEURAT_VERSION@">4.0.4</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">seurat-scripts</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + </xml> + + <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False"> + <conditional name="@VARNAME@" label="Input format"> + <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="loom">Loom</option> + <option value="h5seurat">Seurat HDF5</option> + <option value="anndata">AnnData</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + <when value="anndata"> + <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/> + </when> + <when value="loom"> + <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/> + </when> + <when value="rds_seurat"> + <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/> + </when> + <when value="rds_sce"> + <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/> + </when> + <when value="h5seurat"> + <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/> + </when> + </conditional> + </xml> + + <token name="@INPUT_OBJ_PREAMBLE@"> + #if $input.format == 'loom' + ln -s '$input.loom_file' input.loom; + #else if $input.format == 'h5seurat' + ln -s '$input.h5seurat_file' input.h5seurat; + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + cp '$input.anndata_file' input.h5ad; + #end if + </token> + + <token name="@INPUT_OBJECT@"> + #if $input.format == "anndata" + --input-object-file input.h5ad --input-format anndata + #else if $input.format == "loom" + --input-object-file input.loom --input-format loom + #else if $input.format == "rds_seurat" + --input-object-file '$input.rds_seurat_file' --input-format seurat + #else if $input.format == "rds_sce" + --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-file input.h5seurat --input-format h5seurat + #end if + </token> + + <token name="@QUERY_OBJ_PREAMBLE@"> + #if $query.format == 'loom' + ln -s '$query.loom_file' query.loom; + #else if $query.format == 'h5seurat' + ln -s '$query.h5seurat_file' query.h5seurat; + #else if $query.format == 'anndata' + ## it complains when using links for AnnData... + cp '$query.anndata_file' query.h5ad; + #end if + </token> + + <token name="@QUERY_OBJECT@"> + #if $query.format == "anndata" + --query-object-file query.h5ad --query-format anndata + #else if $query.format == "loom" + --query-object-file query.loom --query-format loom + #else if $query.format == "rds_seurat" + --query-object-file '$query.rds_seurat_file' --query-format seurat + #else if $query.format == "rds_sce" + --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment + #else if $query.format == "h5seurat" + --query-object-file query.h5seurat --query-format h5seurat + #end if + </token> + + <token name="@ANCHORS_OBJ_PREAMBLE@"> + #if $anchors.format == 'loom' + ln -s '$anchors.loom_file' anchors.loom; + #else if $anchors.format == 'h5seurat' + ln -s '$anchors.h5seurat_file' anchors.h5seurat; + #else if $anchors.format == 'anndata' + ## it complains when using links for AnnData... + cp '$anchors.anndata_file' anchors.h5ad; + #end if + </token> + + <token name="@ANCHORS_OBJECT@"> + #if $anchors.format == "anndata" + --anchors-object-file anchors.h5ad --anchors-format anndata + #else if $anchors.format == "loom" + --anchors-object-file anchors.loom --anchors-format loom + #else if $anchors.format == "rds_seurat" + --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat + #else if $anchors.format == "rds_sce" + --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment + #else if $anchors.format == "h5seurat" + --anchors-object-file anchors.h5seurat --anchors-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJ_PREAMBLE@"> + #if $reference.format == 'loom' + ln -s '$reference.loom_file' reference.loom; + #else if $reference.format == 'h5seurat' + ln -s '$reference.h5seurat_file' reference.h5seurat; + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + cp '$reference.anndata_file' reference.h5ad; + #end if + </token> + + <token name="@REFERENCE_OBJECT@"> + #if $reference.format == "anndata" + --reference-object-file reference.h5ad --reference-format anndata + #else if $reference.format == "loom" + --reference-object-file reference.loom --reference-format loom + #else if $reference.format == "rds_seurat" + --reference-object-file '$reference.rds_seurat_file' --reference-format seurat + #else if $reference.format == "rds_sce" + --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" + --reference-object-file reference.h5seurat --reference-format h5seurat + #end if + </token> + + <token name="@INPUT_OBJS_PREAMBLE@"> + #if $input.format == 'loom' + #for $i, $fh in enumerate($input.loom_file): + ln -s '$fh' input.${i}.loom; + #end for + #else if $input.format == 'h5seurat' + #for $i, $fh in enumerate($input.h5seurat_file): + ln -s '$fh' input.${i}.h5seurat; + #end for + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($input.anndata_file): + cp '$fh' input.${i}.h5ad; + #end for + #end if + </token> + + <token name="@INPUT_OBJECTS@"> + #if $input.format == "anndata" + --input-object-files + #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)] + #set files = ",".join($file_array) + ${files} + --input-format anndata + #else if $input.format == "loom" + --input-object-files + #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)] + #set files = ",".join($file_array) + ${files} + --input-format loom + #else if $input.format == "rds_seurat" + --input-object-files + #set file_array = $input.rds_seurat_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format seurat + #else if $input.format == "rds_sce" + --input-object-files + #set file_array = $input.rds_sce_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-files + #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)] + #set files = ",".join($file_array) + ${files} + --input-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJS_PREAMBLE@"> + #if $reference.format == 'loom' + #for $i, $fh in enumerate($reference.loom_file): + ln -s '$fh' reference.${i}.loom; + #end for + #else if $reference.format == 'h5seurat' + #for $i, $fh in enumerate($reference.h5seurat_file): + ln -s '$fh' reference.${i}.h5seurat; + #end for + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($reference.anndata_file): + cp '$fh' reference.${i}.h5ad; + #end for + #end if + </token> + + <token name="@REFERENCE_OBJECTS@"> + #if $reference.format == "anndata" and $reference.anndata_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)] + #set files = ",".join($file_array) + ${files} + --reference-format anndata + #else if $reference.format == "loom" and $reference.loom_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)] + #set files = ",".join($file_array) + ${files} + --reference-format loom + #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ]) + ${files} + --reference-format seurat + #else if $reference.format == "rds_sce" and $reference.rds_sce_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ]) + ${files} + --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" and $reference.h5seurat: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)] + #set files = ",".join($file_array) + ${files} + --reference-format h5seurat + #end if + </token> + + <xml name="output_object_params"> + <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="anndata">AnnData written by Seurat</option> + <option value="loom">Loom</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + </xml> + + <xml name="output_files"> + <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom"> + <filter>format == 'loom'</filter> + </data> + <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS"> + <filter>format == 'rds_seurat'</filter> + </data> + <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat"> + <filter>format == 'anndata'</filter> + </data> + <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS"> + <filter>format == 'rds_sce'</filter> + </data> + </xml> + + <token name="@OUTPUT_OBJECT@"> + #if $format == "anndata" + --output-object-file '$anndata_file' --output-format anndata + #else if $format == "loom" + --output-object-file seurat_obj.loom --output-format loom + #else if $format == "rds_seurat" + --output-object-file '$rds_seurat_file' --output-format seurat + #else if $format == "rds_sce" + --output-object-file '$rds_sce_file' --output-format singlecellexperiment + #end if + </token> + + <xml name="plot_output_files_format" token_format="png"> + <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format='@FORMAT@' > + <filter>plot_format == '@FORMAT@'</filter> + </data> + </xml> + + <token name="@OUTPUT_PLOT@"> + #if $plot_format == "png" + --plot-out '$plot_out_png' + #else if $plot_format == "pdf" + --plot-out '$plot_out_pdf' + #else if $plot_format == "eps" + --plot-out '$plot_out_eps' + #else if $plot_format == "ps" + --plot-out '$plot_out_ps' + #else if $plot_format == "jpg" + --plot-out '$plot_out_jpg' + #else if $plot_format == "tiff" + --plot-out '$plot_out_tiff' + #else if $plot_format == "svg" + --plot-out '$plot_out_svg' + #end if + </token> + + <xml name="genes-use-input"> + <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> + </xml> + <xml name="dims-use-input"> + <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/> + </xml> + + <token name="@SEURAT_INTRO@"><![CDATA[ +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. + ]]></token> + + <token name="@VERSION_HISTORY@"><![CDATA[ +**Version history** +4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca. + +3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support. + +3.1.2_0.0.8: Update metadata parsing + +3.1.1_0.0.7: Exposes perplexity and enables tab input. + +3.1.1_0.0.6+galaxy0: Moved to Seurat 3. + + Find clusters: removed dims-use, k-param, prune-snn. + +2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at +EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l). + +0.0.1: Initial contribution. Maria Doyle (GitHub mblue9). + ]]></token> + + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-021-01102-w</citation> + <citation type="doi">10.1038/nbt.4096</citation> + <citation type="bibtex"> + @misc{r-seurat-scripts.git, + author = {Jonathan Manning, Pablo Moreno, EBI Gene Expression Team}, + year = {2018}, + title = {Seurat-scripts: command line interface for Seurat}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ebi-gene-expression-group/r-seurat-scripts.git}, + } + </citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat_select_integration_features.xml Sat Mar 02 10:40:57 2024 +0000 @@ -0,0 +1,87 @@ +<tool id="seurat_select_integration_features" name="Seurat select integration features" profile="18.01" version="@SEURAT_VERSION@+galaxy0"> + <description>from a list of studies</description> + <macros> + <import>seurat_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version" /> + <command detect_errors="exit_code"><![CDATA[ + @INPUT_OBJS_PREAMBLE@ + seurat-select-integration-features.R + @INPUT_OBJECTS@ + + --nfeatures '$nfeatures' + + + #if $assay_list + --assay-list '$assay_list' + #end if + + $verbose + + + #if $fvf_nfeatures + --fvf-nfeatures '$fvf_nfeatures' + #end if + + + --file-out '$file_out' + + + ]]></command> + <inputs> + <expand macro="input_object_params" multiple="True" /> + <param label="Number of features" optional='true' value='2000' name="nfeatures" argument="--nfeatures" type="integer" help="Number of features to return"/> + <param label="Assay-list" optional='true' name="assay_list" argument="--assay-list" type="text" help="Name or vector of assay names (one for each object) from which to pull the variable features."/> + <param label="Verbose" optional='true' value='true' name="verbose" argument="--do-not-verbose" type="boolean" truevalue='' falsevalue='--do-not-verbose' checked='true' help="Print messages"/> + <param label="Fvf nfeatures" optional='true' value='2000' name="fvf_nfeatures" argument="--fvf-nfeatures" type="integer" help="nfeatures for FindVariableFeatures. Used if VariableFeatures have not been set for any object in input."/> + + </inputs> + <outputs> + <data label="${tool.name} on ${on_string}: Rdata file with features" name="file_out" format='rdata' /> + </outputs> + <tests> + <!-- MANUAL TESTS --> + <test> + <param name="rds_seurat_file" ftype="rdata" value="ifnb_ctrl_norm_fvg.rds,ifnb_stim_norm_fvg.rds"/> + <output name="file_out" ftype="rdata" > + <assert_contents> + <has_size value="9506" delta="950"/> + </assert_contents> + </output> + </test> + <!-- END MANUAL TESTS --> + </tests> +<help> +<!-- MANUAL HELP --> +<![CDATA[ +.. class:: infomark + +**What it does** + +@SEURAT_INTRO@ + +This tool aims to facilitate the first steps of the Seurat 4.0.4 https://satijalab.org/seurat/articles/integration_introduction.html +tutorial, to select features for integration of different datasets. This features can be used then with the integration tool. + +All options are documented in-line. + +----- + +**Inputs** + +* A set of Seurat objects (can be given in other formats as well) to integrate. These objects should be at least normalised and have the find variable genes/features method applied. +* All other inputs are optional (see above). + +----- + +**Outputs** + +* A Seurat (or other format depending on selection) with the integrated object. + +@VERSION_HISTORY@ +]]> +<!-- END MANUAL HELP --> +</help> + <expand macro="citations" /> +</tool>
