comparison scanpy-find-variable-genes.xml @ 1:b089f4a55e6b draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 4846776f55931e176f7e77af7c185ec6fec7d142"
author ebi-gxa
date Mon, 16 Sep 2019 08:19:34 -0400
parents 305d0cbe0ffd
children cb007db0857d
comparison
equal deleted inserted replaced
0:305d0cbe0ffd 1:b089f4a55e6b
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy0"> 2 <tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy0">
3 <description>based on normalised dispersion of expression</description> 3 <description>based on normalised dispersion of expression</description>
4 <macros> 4 <macros>
5 <import>scanpy_macros.xml</import> 5 <import>scanpy_macros2.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '${input_obj_file}' input.h5 && 9 ln -s '${input_obj_file}' input.h5 &&
10 PYTHONIOENCODING=utf-8 scanpy-find-variable-genes.py -i input.h5 10 PYTHONIOENCODING=utf-8 scanpy-find-variable-genes
11 -f '${input_format}' 11 --flavor '${method.flavor}'
12 -o output.h5 12 #if $method.flavor == 'seurat'
13 -F '${output_format}' 13 --mean-limits ${method.min_mean} ${method.max_mean}
14 --flavor '${flavor}' 14 --disp-limits ${method.min_disp} ${method.max_disp}
15 -b '${n_bin}' 15 #else
16 #if $parameters 16 --n-top-genes ${method.n_top_gene}
17 #set pars = ','.join([str($p['name']) for $p in $parameters]) 17 #end if
18 -p '${pars}' 18 --n-bins '${n_bin}'
19 #set mins = ','.join([str($p['min']) for $p in $parameters]) 19 ${filter}
20 -l '${mins}' 20 @INPUT_OPTS@
21 #set maxs = ','.join([str($p['max']) for $p in $parameters]) 21 @OUTPUT_OPTS@
22 -j '${maxs}'
23 #end if
24 #if $n_top_gene
25 -n '${n_top_gene}'
26 #end if
27 ]]></command> 22 ]]></command>
28 23
29 <inputs> 24 <inputs>
30 <expand macro="input_object_params"/> 25 <expand macro="input_object_params"/>
31 <expand macro="output_object_params"/> 26 <expand macro="output_object_params"/>
32 <param name="flavor" argument="--flavor" type="select" value="seurat" label="Flavor of computing normalised dispersion"> 27 <conditional name="method">
33 <option value="seurat">Seurat</option> 28 <param name="flavor" argument="--flavor" type="select" label="Flavor of computing normalised dispersion">
34 <option value="cell_ranger">Cell-ranger</option> 29 <option value="seurat" selected="true">Seurat</option>
35 </param> 30 <option value="cell_ranger">Cell-ranger</option>
36 <repeat name="parameters" min="1" title="Parameters used to find variable genes">
37 <param name="name" type="select" label="Name of parameter to filter on">
38 <option value="mean">Mean of expression</option>
39 <option value="disp">Dispersion of expression</option>
40 </param> 31 </param>
41 <param name="min" type="float" value="0" label="Min value"/> 32 <when value="seurat">
42 <param name="max" type="float" value="1e9" label="Max value"/> 33 <param name="min_mean" argument="--min-mean" type="float" min="0" value="0.0125"
43 </repeat> 34 label="Min value for normalised mean expression (in log1p scale)"/>
35 <param name="max_mean" argument="--max-mean" type="float" min="0" value="3"
36 label="Max value for normalised mean expresssion (in log1p scale)"/>
37 <param name="min_disp" argument="--min-disp" type="float" min="0" value="0.5"
38 label="Min value for dispersion of expression"/>
39 <param name="max_disp" argument="--max-disp" type="float" min="0" value="50"
40 label="Max value for dispersion of expresssion"/>
41 </when>
42 <when value="cell_ranger">
43 <param name="n_top_gene" argument="--n-top-genes" type="integer" value="2000"
44 label="Number of top variable genes to keep"/>
45 </when>
46 </conditional>
44 <param name="n_bin" argument="--n-bins" type="integer" value="20" label="Number of bins for binning the mean expression"/> 47 <param name="n_bin" argument="--n-bins" type="integer" value="20" label="Number of bins for binning the mean expression"/>
45 <param name="n_top_gene" argument="--n-top-genes" type="integer" optional="true" label="Number of top variable genes to keep"/> 48 <param name="filter" argument="--subset" type="boolean" truevalue="--subset" falsevalue="" checked="false"
49 label="Remove genes not marked as highly variable"/>
46 </inputs> 50 </inputs>
47 51
48 <outputs> 52 <outputs>
49 <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: Variable genes"/> 53 <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: Variable genes"/>
50 </outputs> 54 </outputs>
54 <param name="input_obj_file" value="normalise_data.h5"/> 58 <param name="input_obj_file" value="normalise_data.h5"/>
55 <param name="input_format" value="anndata"/> 59 <param name="input_format" value="anndata"/>
56 <param name="output_format" value="anndata"/> 60 <param name="output_format" value="anndata"/>
57 <param name="flavor" value="seurat"/> 61 <param name="flavor" value="seurat"/>
58 <param name="n_bin" value="20"/> 62 <param name="n_bin" value="20"/>
59 <repeat name="parameters"> 63 <param name="min_mean" value="0.0125"/>
60 <param name="name" value="mean"/> 64 <param name="max_mean" value="3"/>
61 <param name="min" value="0.0125"/> 65 <param name="min_disp" value="0.5"/>
62 <param name="max" value="3"/> 66 <param name="max_disp" value="1e9"/>
63 </repeat>
64 <repeat name="parameters">
65 <param name="name" value="disp"/>
66 <param name="min" value="0.5"/>
67 <param name="max" value="1e9"/>
68 </repeat>
69 <output name="output_h5" file="find_variable_genes.h5" ftype="h5" compare="sim_size"/> 67 <output name="output_h5" file="find_variable_genes.h5" ftype="h5" compare="sim_size"/>
70 </test> 68 </test>
71 </tests> 69 </tests>
72 70
73 <help><![CDATA[ 71 <help><![CDATA[
74 ============================================================ 72 ==============================================================
75 Extract highly variable genes (`pp.filter_genes_dispersion`) 73 Mark highly variable genes (`scanpy.pp.highly_variable_genes`)
76 ============================================================ 74 ==============================================================
77 75
78 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. 76 Depending on `flavor`, this reproduces the R-implementations of Seurat or Cell Ranger.
79 77
80 The normalized dispersion is obtained by scaling with the mean and standard 78 The normalized dispersion is obtained by scaling with the mean and standard
81 deviation of the dispersions for genes falling into a given bin for mean 79 deviation of the dispersions for genes falling into a given bin for mean
82 expression of genes. This means that for each bin of mean expression, highly 80 expression of genes. This means that for each bin of mean expression, highly
83 variable genes are selected. 81 variable genes are selected.