Mercurial > repos > iuc > scanpy_normalize
comparison normalize.xml @ 17:5dada6f76047 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author | iuc |
---|---|
date | Sat, 14 Sep 2024 12:42:55 +0000 |
parents | d844935c906c |
children |
comparison
equal
deleted
inserted
replaced
16:9f354c7c8c92 | 17:5dada6f76047 |
---|---|
1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> | 1 <tool id="scanpy_normalize" name="Scanpy normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 <description>and impute with scanpy</description> | 2 <description>and impute</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="bio_tools"/> | 6 <expand macro="bio_tools"/> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"> |
8 <requirement type="package" version="3.0.0">magic-impute</requirement> | |
9 </expand> | |
8 <expand macro="version_command"/> | 10 <expand macro="version_command"/> |
9 <command detect_errors="exit_code"><![CDATA[ | 11 <command detect_errors="exit_code"><![CDATA[ |
10 @CMD@ | 12 @CMD@ |
11 ]]></command> | 13 ]]></command> |
12 <configfiles> | 14 <configfiles> |
13 <configfile name="script_file"><![CDATA[ | 15 <configfile name="script_file"><![CDATA[ |
14 @CMD_imports@ | 16 @CMD_IMPORTS@ |
15 @CMD_read_inputs@ | 17 @CMD_READ_INPUTS@ |
16 | 18 |
17 #if $method.method == "pp.normalize_total" | 19 #if str($method.method) == 'pp.normalize_total': |
18 sc.pp.normalize_total( | 20 sc.pp.normalize_total( |
19 adata, | 21 adata, |
20 #if str($method.target_sum) != '' | 22 #if str($method.target_sum) != '': |
21 target_sum=$method.target_sum, | 23 target_sum=$method.target_sum, |
22 #end if | 24 #end if |
23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, | 25 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, |
24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" | 26 #if str($method.exclude_highly_expressed.exclude_highly_expressed) == 'True': |
25 max_fraction=$method.exclude_highly_expressed.max_fraction, | 27 max_fraction=$method.exclude_highly_expressed.max_fraction, |
26 #end if | 28 #end if |
27 #if $method.key_added | 29 #if str($method.key_added) != '': |
28 key_added='$method.key_added', | 30 key_added='$method.key_added', |
29 #end if | 31 #end if |
30 #if $method.layers | 32 #if str($method.layer) != '': |
31 #if str($method.layers) != 'all' | 33 layer='$method.layer', |
32 layers[str(x.strip()) for x in str($method.layers).split(',')], | |
33 #else | |
34 layers='$method.layers', | |
35 #end if | |
36 #end if | |
37 #if str($method.layer_norm) != "None" | |
38 layer_norm='$method.layer_norm', | |
39 #end if | 34 #end if |
40 inplace=True) | 35 inplace=True) |
41 | 36 |
42 #else if $method.method == "pp.recipe_zheng17" | 37 #else if str($method.method) == 'pp.recipe_zheng17': |
43 sc.pp.recipe_zheng17( | 38 sc.pp.recipe_zheng17( |
44 adata=adata, | 39 adata=adata, |
45 n_top_genes=$method.n_top_genes, | 40 n_top_genes=$method.n_top_genes, |
46 log=$method.log, | 41 log=$method.log, |
47 plot=False, | 42 plot=False, |
48 copy=False) | 43 copy=False) |
49 | 44 |
50 #else if $method.method == "pp.recipe_weinreb17" | 45 #else if str($method.method) == 'pp.recipe_weinreb17': |
51 sc.pp.recipe_weinreb17( | 46 sc.pp.recipe_weinreb17( |
52 adata=adata, | 47 adata=adata, |
53 log=$method.log, | 48 log=$method.log, |
54 mean_threshold=$method.mean_threshold, | 49 mean_threshold=0.01, |
55 cv_threshold=$method.cv_threshold, | 50 cv_threshold=2, |
56 n_pcs=$method.n_pcs, | 51 n_pcs=50, |
57 svd_solver='$method.svd_solver', | 52 svd_solver='randomized', |
58 random_state=$method.random_state, | 53 random_state=0, |
59 copy=False) | 54 copy=False) |
60 | 55 |
61 #else if $method.method == "pp.recipe_seurat" | 56 #else if str($method.method) == 'pp.recipe_seurat': |
62 sc.pp.recipe_seurat( | 57 sc.pp.recipe_seurat( |
63 adata=adata, | 58 adata=adata, |
64 log=$method.log, | 59 log=$method.log, |
65 plot=False, | 60 plot=False, |
66 copy=False) | 61 copy=False) |
67 | 62 |
68 #else if $method.method == "external.pp.magic" | 63 #else if str($method.method) == 'external.pp.magic': |
64 print("stats before magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}") | |
65 | |
69 sc.external.pp.magic( | 66 sc.external.pp.magic( |
70 adata=adata, | 67 adata=adata, |
71 name_list='$method.name_list', | 68 name_list='$method.name_list', |
72 knn=$method.knn, | 69 knn=$method.knn, |
73 #if str($method.decay) != '' | 70 #if str($method.decay) != '': |
74 decay=$method.decay, | 71 decay=$method.decay, |
75 #end if | 72 #end if |
76 #if str($method.knn_max) != '' | 73 #if str($method.knn_max) != '': |
77 knn_max=$method.knn_max, | 74 knn_max=$method.knn_max, |
78 #end if | 75 #end if |
79 #if $method.t == -1 | 76 #if $method.t == -1: |
80 t='auto', | 77 t='auto', |
81 #else | 78 #else |
82 t=$method.t, | 79 t=$method.t, |
83 #end if | 80 #end if |
84 #if str($method.n_pca) != '' | 81 #if str($method.n_pca) != '': |
85 n_pca=$method.n_pca, | 82 n_pca=$method.n_pca, |
86 #end if | 83 #end if |
87 solver='$method.solver', | 84 solver='$method.solver', |
88 knn_dist='$method.knn_dist', | 85 knn_dist='$method.knn_dist', |
86 #if str($method.random_state) != '': | |
89 random_state=$method.random_state, | 87 random_state=$method.random_state, |
88 #else | |
89 random_state=None, | |
90 #end if | |
90 copy=False) | 91 copy=False) |
92 | |
93 #if str($method.name_list) == 'all_genes': | |
94 print("stats after magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}") | |
95 #end if | |
91 #end if | 96 #end if |
92 | 97 |
93 @CMD_anndata_write_outputs@ | 98 @CMD_ANNDATA_WRITE_OUTPUTS@ |
94 | 99 ]]> |
95 ]]></configfile> | 100 </configfile> |
96 </configfiles> | 101 </configfiles> |
97 <inputs> | 102 <inputs> |
98 <expand macro="inputs_anndata"/> | 103 <expand macro="inputs_anndata"/> |
99 <conditional name="method"> | 104 <conditional name="method"> |
100 <param argument="method" type="select" label="Method used for normalization"> | 105 <param argument="method" type="select" label="Method used for normalization"> |
106 </param> | 111 </param> |
107 <when value="pp.normalize_total"> | 112 <when value="pp.normalize_total"> |
108 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> | 113 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> |
109 <conditional name="exclude_highly_expressed"> | 114 <conditional name="exclude_highly_expressed"> |
110 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> | 115 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> |
116 <option value="False" selected="true">No</option> | |
111 <option value="True">Yes</option> | 117 <option value="True">Yes</option> |
112 <option value="False" selected="true">No</option> | |
113 </param> | 118 </param> |
114 <when value="True"> | 119 <when value="True"> |
115 <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> | 120 <param argument="max_fraction" type="float" value="0.05" label="Consider cells as highly expressed that have more counts than this value of the original total counts in at least one cell."/> |
116 </when> | 121 </when> |
117 <when value="False"/> | 122 <when value="False"/> |
118 </conditional> | 123 </conditional> |
119 <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored" help=""> | 124 <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored"> |
120 <expand macro="sanitize_query" /> | 125 <expand macro="sanitize_query"/> |
121 </param> | 126 </param> |
122 <param argument="layers" type="text" value="" optional="true" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated."> | 127 <param argument="layer" type="text" value="" label="Layer to normalize instead of X. If not provided, X is normalized."> |
123 <expand macro="sanitize_query" /> | 128 <expand macro="sanitize_query"/> |
124 </param> | |
125 <param argument="layer_norm" type="select" label="How to normalize layers?"> | |
126 <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option> | |
127 <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option> | |
128 <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option> | |
129 </param> | 129 </param> |
130 </when> | 130 </when> |
131 <when value="pp.recipe_zheng17"> | 131 <when value="pp.recipe_zheng17"> |
132 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> | 132 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep"/> |
133 <expand macro="param_log"/> | 133 <expand macro="param_log" checked="true"/> |
134 </when> | 134 </when> |
135 <when value="pp.recipe_weinreb17"> | 135 <when value="pp.recipe_weinreb17"> |
136 <expand macro="param_log"/> | 136 <expand macro="param_log" checked="true"/> |
137 <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/> | |
138 <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/> | |
139 <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/> | |
140 <expand macro="svd_solver"/> | |
141 <expand macro="pca_random_state"/> | |
142 </when> | 137 </when> |
143 <when value="pp.recipe_seurat"> | 138 <when value="pp.recipe_seurat"> |
144 <expand macro="param_log"/> | 139 <expand macro="param_log" checked="true"/> |
145 </when> | 140 </when> |
146 <when value="external.pp.magic"> | 141 <when value="external.pp.magic"> |
147 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> | 142 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> |
148 <option value="all_genes">All genes</option> | 143 <option value="all_genes" selected="true">All genes</option> |
149 <option value="pca_only">PCA only</option> | 144 <option value="pca_only">PCA only</option> |
150 </param> | 145 </param> |
151 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/> | 146 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel"/> |
152 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" | 147 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" help="If not set, alpha decaying kernel is not used"/> |
153 help="If not set, alpha decaying kernel is not used" /> | 148 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" help="If not set, will be set to 3 * knn"/> |
154 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" | 149 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data."/> |
155 help="If not set, will be set to 3 * knn" /> | |
156 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" | |
157 help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." /> | |
158 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" | 150 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" |
159 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." /> | 151 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed."/> |
160 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> | 152 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> |
161 <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option> | 153 <option value="exact" selected="true">"exact", the implementation described in van Dijk et al. (2018) </option> |
162 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> | 154 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> |
163 </param> | 155 </param> |
164 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> | 156 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> |
165 <expand macro="distance_metric_options"/> | 157 <expand macro="distance_metric_options"/> |
166 </param> | 158 </param> |
167 <expand macro="param_random_state"/> | 159 <param argument="random_state" type="integer" optional="true" label="Random seed" help="Defaults to the global numpy random number generator."/> |
168 </when> | 160 </when> |
169 </conditional> | 161 </conditional> |
170 <expand macro="inputs_common_advanced"/> | 162 <expand macro="inputs_common_advanced"/> |
171 </inputs> | 163 </inputs> |
172 <outputs> | 164 <outputs> |
173 <expand macro="anndata_outputs"/> | 165 <expand macro="anndata_outputs"/> |
174 </outputs> | 166 </outputs> |
175 <tests> | 167 <tests> |
176 <test expect_num_outputs="2"> | 168 |
177 <!-- test 1 --> | 169 <!-- test 1 --> |
178 <param name="adata" value="krumsiek11.h5ad" /> | 170 <test expect_num_outputs="2"> |
171 <param name="adata" value="krumsiek11.h5ad"/> | |
179 <conditional name="method"> | 172 <conditional name="method"> |
180 <param name="method" value="pp.normalize_total"/> | 173 <param name="method" value="pp.normalize_total"/> |
181 <conditional name="exclude_highly_expressed"> | |
182 <param name="exclude_highly_expressed" value="False"/> | |
183 </conditional> | |
184 <param name="key_added" value="n_counts"/> | 174 <param name="key_added" value="n_counts"/> |
185 <param name="layers" value="all"/> | 175 </conditional> |
186 <param name="layer_norm" value="None"/> | 176 <section name="advanced_common"> |
187 </conditional> | 177 <param name="show_log" value="true"/> |
188 <section name="advanced_common"> | |
189 <param name="show_log" value="true" /> | |
190 </section> | 178 </section> |
191 <output name="hidden_output"> | 179 <output name="hidden_output"> |
192 <assert_contents> | 180 <assert_contents> |
193 <has_text_matching expression="sc.pp.normalize_total"/> | 181 <has_text_matching expression="sc.pp.normalize_total"/> |
194 <has_text_matching expression="exclude_highly_expressed=False"/> | 182 <has_text_matching expression="exclude_highly_expressed=False"/> |
195 <has_text_matching expression="key_added='n_counts'"/> | 183 <has_text_matching expression="key_added='n_counts'"/> |
196 <has_text_matching expression="layers='all'"/> | 184 </assert_contents> |
197 </assert_contents> | 185 </output> |
198 </output> | 186 <output name="anndata_out" ftype="h5ad"> |
199 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | 187 <assert_contents> |
200 </test> | 188 <has_h5_keys keys="obs/n_counts"/> |
201 <test expect_num_outputs="2"> | 189 </assert_contents> |
202 <!-- test 2 --> | 190 </output> |
191 </test> | |
192 | |
193 <!-- test 2 --> | |
194 <test expect_num_outputs="2"> | |
203 <param name="adata" value="random-randint.h5ad"/> | 195 <param name="adata" value="random-randint.h5ad"/> |
204 <conditional name="method"> | 196 <conditional name="method"> |
205 <param name="method" value="pp.recipe_zheng17"/> | 197 <param name="method" value="pp.recipe_zheng17"/> |
206 <param name="n_top_genes" value="1000"/> | 198 </conditional> |
207 <param name="log" value="True"/> | 199 <section name="advanced_common"> |
208 </conditional> | 200 <param name="show_log" value="true"/> |
209 <section name="advanced_common"> | |
210 <param name="show_log" value="true" /> | |
211 </section> | 201 </section> |
212 <output name="hidden_output"> | 202 <output name="hidden_output"> |
213 <assert_contents> | 203 <assert_contents> |
214 <has_text_matching expression="sc.pp.recipe_zheng17"/> | 204 <has_text_matching expression="sc.pp.recipe_zheng17"/> |
215 <has_text_matching expression="n_top_genes=1000"/> | 205 <has_text_matching expression="n_top_genes=1000"/> |
216 <has_text_matching expression="log=True"/> | 206 <has_text_matching expression="log=True"/> |
217 </assert_contents> | 207 </assert_contents> |
218 </output> | 208 </output> |
219 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> | 209 <output name="anndata_out" ftype="h5ad"> |
220 </test> | 210 <assert_contents> |
221 <test expect_num_outputs="2"> | 211 <has_h5_keys keys="obs/n_counts_all"/> |
222 <!-- test 3 --> | 212 <has_h5_keys keys="var/n_counts,var/mean,var/std"/> |
223 <param name="adata" value="paul15_subsample.h5ad" /> | 213 <has_h5_keys keys="uns/log1p"/> |
214 </assert_contents> | |
215 </output> | |
216 </test> | |
217 | |
218 <!-- test 3 --> | |
219 <test expect_num_outputs="2"> | |
220 <param name="adata" value="paul15_subsample.h5ad"/> | |
224 <conditional name="method"> | 221 <conditional name="method"> |
225 <param name="method" value="pp.recipe_weinreb17"/> | 222 <param name="method" value="pp.recipe_weinreb17"/> |
226 <param name="log" value="True"/> | 223 </conditional> |
227 <param name="mean_threshold" value="0.01"/> | 224 <section name="advanced_common"> |
228 <param name="cv_threshold" value="2.0"/> | 225 <param name="show_log" value="true"/> |
229 <param name="n_pcs" value="50"/> | |
230 <param name="svd_solver" value="randomized"/> | |
231 <param name="random_state" value="0"/> | |
232 </conditional> | |
233 <section name="advanced_common"> | |
234 <param name="show_log" value="true" /> | |
235 </section> | 226 </section> |
236 <output name="hidden_output"> | 227 <output name="hidden_output"> |
237 <assert_contents> | 228 <assert_contents> |
238 <has_text_matching expression="sc.pp.recipe_weinreb17"/> | 229 <has_text_matching expression="sc.pp.recipe_weinreb17"/> |
239 <has_text_matching expression="log=True"/> | 230 <has_text_matching expression="log=True"/> |
240 <has_text_matching expression="mean_threshold=0.01"/> | 231 <has_text_matching expression="mean_threshold=0.01"/> |
241 <has_text_matching expression="cv_threshold=2.0"/> | 232 <has_text_matching expression="cv_threshold=2"/> |
242 <has_text_matching expression="n_pcs=50"/> | 233 <has_text_matching expression="n_pcs=50"/> |
243 <has_text_matching expression="svd_solver='randomized'"/> | 234 <has_text_matching expression="svd_solver='randomized'"/> |
244 <has_text_matching expression="random_state=0"/> | 235 <has_text_matching expression="random_state=0"/> |
245 </assert_contents> | 236 </assert_contents> |
246 </output> | 237 </output> |
247 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> | 238 <output name="anndata_out" ftype="h5ad"> |
248 </test> | 239 <assert_contents> |
249 <test expect_num_outputs="2"> | 240 <has_h5_keys keys="uns/log1p"/> |
250 <!-- test 4 --> | 241 </assert_contents> |
251 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> | 242 </output> |
243 </test> | |
244 | |
245 <!-- test 4 --> | |
246 <test expect_num_outputs="2"> | |
247 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad"/> | |
252 <conditional name="method"> | 248 <conditional name="method"> |
253 <param name="method" value="pp.recipe_seurat"/> | 249 <param name="method" value="pp.recipe_seurat"/> |
254 <param name="log" value="True"/> | 250 </conditional> |
255 </conditional> | 251 <section name="advanced_common"> |
256 <section name="advanced_common"> | 252 <param name="show_log" value="true"/> |
257 <param name="show_log" value="true" /> | |
258 </section> | 253 </section> |
259 <output name="hidden_output"> | 254 <output name="hidden_output"> |
260 <assert_contents> | 255 <assert_contents> |
261 <has_text_matching expression="sc.pp.recipe_seurat"/> | 256 <has_text_matching expression="sc.pp.recipe_seurat"/> |
262 <has_text_matching expression="log=True"/> | 257 <has_text_matching expression="log=True"/> |
263 </assert_contents> | 258 </assert_contents> |
264 </output> | 259 </output> |
265 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> | 260 <output name="anndata_out" ftype="h5ad"> |
266 </test> | 261 <assert_contents> |
267 <test expect_num_outputs="2"> | 262 <has_h5_keys keys="obs/n_genes"/> |
268 <!-- test 5 --> | 263 <has_h5_keys keys="var/n_cells"/> |
269 <param name="adata" value="krumsiek11.h5ad" /> | 264 <has_h5_keys keys="uns/log1p"/> |
265 </assert_contents> | |
266 </output> | |
267 </test> | |
268 | |
269 <!-- test 5 --> | |
270 <test expect_num_outputs="2"> | |
271 <param name="adata" value="krumsiek11.h5ad"/> | |
270 <conditional name="method"> | 272 <conditional name="method"> |
271 <param name="method" value="external.pp.magic"/> | 273 <param name="method" value="external.pp.magic"/> |
272 <param name="name_list" value="all_genes"/> | |
273 <param name="t" value="-1"/> | 274 <param name="t" value="-1"/> |
274 <param name="n_pca" value="5"/> | 275 <param name="n_pca" value="5"/> |
275 </conditional> | 276 </conditional> |
276 <section name="advanced_common"> | 277 <section name="advanced_common"> |
277 <param name="show_log" value="true" /> | 278 <param name="show_log" value="true"/> |
278 </section> | 279 </section> |
279 <output name="hidden_output"> | 280 <output name="hidden_output"> |
280 <assert_contents> | 281 <assert_contents> |
281 <has_text_matching expression="external.pp.magic"/> | 282 <has_text_matching expression="external.pp.magic"/> |
282 <has_text_matching expression="name_list='all_genes'"/> | 283 <has_text_matching expression="name_list='all_genes'"/> |
283 <has_text_matching expression="t='auto'"/> | 284 <has_text_matching expression="t='auto'"/> |
284 <has_text_matching expression="n_pca=5"/> | 285 <has_text_matching expression="n_pca=5"/> |
285 </assert_contents> | 286 <has_text_matching expression="stats before magic: min= -0.01630 max= 1.01060 mean= 0.28644"/> |
286 </output> | 287 <has_text_matching expression="stats after magic: min= -0.00857 max= 1.00546 mean= 0.28645"/> |
287 <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | 288 </assert_contents> |
288 </test> | 289 </output> |
289 <test expect_num_outputs="2"> | 290 <output name="anndata_out" ftype="h5ad"> |
290 <!-- test 6 --> | 291 <assert_contents> |
291 <param name="adata" value="krumsiek11.h5ad" /> | 292 <has_h5_keys keys="obs/cell_type"/> |
293 </assert_contents> | |
294 </output> | |
295 </test> | |
296 | |
297 <!-- test 6 --> | |
298 <test expect_num_outputs="2"> | |
299 <param name="adata" value="krumsiek11.h5ad"/> | |
292 <conditional name="method"> | 300 <conditional name="method"> |
293 <param name="method" value="external.pp.magic"/> | 301 <param name="method" value="external.pp.magic"/> |
294 <param name="name_list" value="pca_only"/> | 302 <param name="name_list" value="pca_only"/> |
295 <param name="t" value="3"/> | 303 <param name="t" value="3"/> |
296 <param name="n_pca" value="5"/> | 304 <param name="n_pca" value="5"/> |
297 </conditional> | 305 </conditional> |
298 <section name="advanced_common"> | 306 <section name="advanced_common"> |
299 <param name="show_log" value="true" /> | 307 <param name="show_log" value="true"/> |
300 </section> | 308 </section> |
301 <output name="hidden_output"> | 309 <output name="hidden_output"> |
302 <assert_contents> | 310 <assert_contents> |
303 <has_text_matching expression="external.pp.magic"/> | 311 <has_text_matching expression="external.pp.magic"/> |
304 <has_text_matching expression="name_list='pca_only'"/> | 312 <has_text_matching expression="name_list='pca_only'"/> |
305 <has_text_matching expression="t=3"/> | 313 <has_text_matching expression="t=3"/> |
306 <has_text_matching expression="n_pca=5"/> | 314 <has_text_matching expression="n_pca=5"/> |
307 </assert_contents> | 315 </assert_contents> |
308 </output> | 316 </output> |
309 <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | 317 <output name="anndata_out" ftype="h5ad"> |
318 <assert_contents> | |
319 <has_h5_keys keys="obsm/X_magic"/> | |
320 </assert_contents> | |
321 </output> | |
310 <assert_stdout> | 322 <assert_stdout> |
311 <has_text text="X_magic"/> | 323 <has_text text="X_magic"/> |
312 </assert_stdout> | 324 </assert_stdout> |
313 </test> | 325 </test> |
314 </tests> | 326 </tests> |
315 <help><![CDATA[ | 327 <help><![CDATA[ |
316 Normalize total counts per cell (`pp.normalize_per_cell`) | 328 Normalize total counts per cell (`pp.normalize_total`) |
317 ========================================================= | 329 ====================================================== |
318 | 330 |
319 Normalize each cell by total counts over all genes, so that every cell has | 331 Normalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization. |
320 the same total count after normalization. | |
321 | 332 |
322 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. | 333 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. |
323 | 334 |
324 More details on the `scanpy documentation | 335 More details on the `scanpy documentation |
325 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ | 336 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_total.html>`__ |
326 | 337 |
327 | 338 |
328 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) | 339 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) |
329 ================================================================================================================== | 340 ================================================================================================================== |
330 | 341 |
367 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) | 378 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) |
368 ============================================================================================================ | 379 ============================================================================================================ |
369 | 380 |
370 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. | 381 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. |
371 | 382 |
372 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). | 383 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). |
373 | 384 |
374 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. | 385 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. |
375 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. | 386 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. |
376 | 387 |
377 More details on the `scanpy documentation | 388 More details on the `scanpy documentation |