comparison normalize.xml @ 17:5dada6f76047 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author iuc
date Sat, 14 Sep 2024 12:42:55 +0000
parents d844935c906c
children
comparison
equal deleted inserted replaced
16:9f354c7c8c92 17:5dada6f76047
1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> 1 <tool id="scanpy_normalize" name="Scanpy normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>and impute with scanpy</description> 2 <description>and impute</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements">
8 <requirement type="package" version="3.0.0">magic-impute</requirement>
9 </expand>
8 <expand macro="version_command"/> 10 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
10 @CMD@ 12 @CMD@
11 ]]></command> 13 ]]></command>
12 <configfiles> 14 <configfiles>
13 <configfile name="script_file"><![CDATA[ 15 <configfile name="script_file"><![CDATA[
14 @CMD_imports@ 16 @CMD_IMPORTS@
15 @CMD_read_inputs@ 17 @CMD_READ_INPUTS@
16 18
17 #if $method.method == "pp.normalize_total" 19 #if str($method.method) == 'pp.normalize_total':
18 sc.pp.normalize_total( 20 sc.pp.normalize_total(
19 adata, 21 adata,
20 #if str($method.target_sum) != '' 22 #if str($method.target_sum) != '':
21 target_sum=$method.target_sum, 23 target_sum=$method.target_sum,
22 #end if 24 #end if
23 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, 25 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
24 #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" 26 #if str($method.exclude_highly_expressed.exclude_highly_expressed) == 'True':
25 max_fraction=$method.exclude_highly_expressed.max_fraction, 27 max_fraction=$method.exclude_highly_expressed.max_fraction,
26 #end if 28 #end if
27 #if $method.key_added 29 #if str($method.key_added) != '':
28 key_added='$method.key_added', 30 key_added='$method.key_added',
29 #end if 31 #end if
30 #if $method.layers 32 #if str($method.layer) != '':
31 #if str($method.layers) != 'all' 33 layer='$method.layer',
32 layers[str(x.strip()) for x in str($method.layers).split(',')],
33 #else
34 layers='$method.layers',
35 #end if
36 #end if
37 #if str($method.layer_norm) != "None"
38 layer_norm='$method.layer_norm',
39 #end if 34 #end if
40 inplace=True) 35 inplace=True)
41 36
42 #else if $method.method == "pp.recipe_zheng17" 37 #else if str($method.method) == 'pp.recipe_zheng17':
43 sc.pp.recipe_zheng17( 38 sc.pp.recipe_zheng17(
44 adata=adata, 39 adata=adata,
45 n_top_genes=$method.n_top_genes, 40 n_top_genes=$method.n_top_genes,
46 log=$method.log, 41 log=$method.log,
47 plot=False, 42 plot=False,
48 copy=False) 43 copy=False)
49 44
50 #else if $method.method == "pp.recipe_weinreb17" 45 #else if str($method.method) == 'pp.recipe_weinreb17':
51 sc.pp.recipe_weinreb17( 46 sc.pp.recipe_weinreb17(
52 adata=adata, 47 adata=adata,
53 log=$method.log, 48 log=$method.log,
54 mean_threshold=$method.mean_threshold, 49 mean_threshold=0.01,
55 cv_threshold=$method.cv_threshold, 50 cv_threshold=2,
56 n_pcs=$method.n_pcs, 51 n_pcs=50,
57 svd_solver='$method.svd_solver', 52 svd_solver='randomized',
58 random_state=$method.random_state, 53 random_state=0,
59 copy=False) 54 copy=False)
60 55
61 #else if $method.method == "pp.recipe_seurat" 56 #else if str($method.method) == 'pp.recipe_seurat':
62 sc.pp.recipe_seurat( 57 sc.pp.recipe_seurat(
63 adata=adata, 58 adata=adata,
64 log=$method.log, 59 log=$method.log,
65 plot=False, 60 plot=False,
66 copy=False) 61 copy=False)
67 62
68 #else if $method.method == "external.pp.magic" 63 #else if str($method.method) == 'external.pp.magic':
64 print("stats before magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}")
65
69 sc.external.pp.magic( 66 sc.external.pp.magic(
70 adata=adata, 67 adata=adata,
71 name_list='$method.name_list', 68 name_list='$method.name_list',
72 knn=$method.knn, 69 knn=$method.knn,
73 #if str($method.decay) != '' 70 #if str($method.decay) != '':
74 decay=$method.decay, 71 decay=$method.decay,
75 #end if 72 #end if
76 #if str($method.knn_max) != '' 73 #if str($method.knn_max) != '':
77 knn_max=$method.knn_max, 74 knn_max=$method.knn_max,
78 #end if 75 #end if
79 #if $method.t == -1 76 #if $method.t == -1:
80 t='auto', 77 t='auto',
81 #else 78 #else
82 t=$method.t, 79 t=$method.t,
83 #end if 80 #end if
84 #if str($method.n_pca) != '' 81 #if str($method.n_pca) != '':
85 n_pca=$method.n_pca, 82 n_pca=$method.n_pca,
86 #end if 83 #end if
87 solver='$method.solver', 84 solver='$method.solver',
88 knn_dist='$method.knn_dist', 85 knn_dist='$method.knn_dist',
86 #if str($method.random_state) != '':
89 random_state=$method.random_state, 87 random_state=$method.random_state,
88 #else
89 random_state=None,
90 #end if
90 copy=False) 91 copy=False)
92
93 #if str($method.name_list) == 'all_genes':
94 print("stats after magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}")
95 #end if
91 #end if 96 #end if
92 97
93 @CMD_anndata_write_outputs@ 98 @CMD_ANNDATA_WRITE_OUTPUTS@
94 99 ]]>
95 ]]></configfile> 100 </configfile>
96 </configfiles> 101 </configfiles>
97 <inputs> 102 <inputs>
98 <expand macro="inputs_anndata"/> 103 <expand macro="inputs_anndata"/>
99 <conditional name="method"> 104 <conditional name="method">
100 <param argument="method" type="select" label="Method used for normalization"> 105 <param argument="method" type="select" label="Method used for normalization">
106 </param> 111 </param>
107 <when value="pp.normalize_total"> 112 <when value="pp.normalize_total">
108 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> 113 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
109 <conditional name="exclude_highly_expressed"> 114 <conditional name="exclude_highly_expressed">
110 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> 115 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
116 <option value="False" selected="true">No</option>
111 <option value="True">Yes</option> 117 <option value="True">Yes</option>
112 <option value="False" selected="true">No</option>
113 </param> 118 </param>
114 <when value="True"> 119 <when value="True">
115 <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> 120 <param argument="max_fraction" type="float" value="0.05" label="Consider cells as highly expressed that have more counts than this value of the original total counts in at least one cell."/>
116 </when> 121 </when>
117 <when value="False"/> 122 <when value="False"/>
118 </conditional> 123 </conditional>
119 <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored" help=""> 124 <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored">
120 <expand macro="sanitize_query" /> 125 <expand macro="sanitize_query"/>
121 </param> 126 </param>
122 <param argument="layers" type="text" value="" optional="true" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated."> 127 <param argument="layer" type="text" value="" label="Layer to normalize instead of X. If not provided, X is normalized.">
123 <expand macro="sanitize_query" /> 128 <expand macro="sanitize_query"/>
124 </param>
125 <param argument="layer_norm" type="select" label="How to normalize layers?">
126 <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option>
127 <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option>
128 <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option>
129 </param> 129 </param>
130 </when> 130 </when>
131 <when value="pp.recipe_zheng17"> 131 <when value="pp.recipe_zheng17">
132 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> 132 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep"/>
133 <expand macro="param_log"/> 133 <expand macro="param_log" checked="true"/>
134 </when> 134 </when>
135 <when value="pp.recipe_weinreb17"> 135 <when value="pp.recipe_weinreb17">
136 <expand macro="param_log"/> 136 <expand macro="param_log" checked="true"/>
137 <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
138 <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
139 <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
140 <expand macro="svd_solver"/>
141 <expand macro="pca_random_state"/>
142 </when> 137 </when>
143 <when value="pp.recipe_seurat"> 138 <when value="pp.recipe_seurat">
144 <expand macro="param_log"/> 139 <expand macro="param_log" checked="true"/>
145 </when> 140 </when>
146 <when value="external.pp.magic"> 141 <when value="external.pp.magic">
147 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> 142 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
148 <option value="all_genes">All genes</option> 143 <option value="all_genes" selected="true">All genes</option>
149 <option value="pca_only">PCA only</option> 144 <option value="pca_only">PCA only</option>
150 </param> 145 </param>
151 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/> 146 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel"/>
152 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" 147 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" help="If not set, alpha decaying kernel is not used"/>
153 help="If not set, alpha decaying kernel is not used" /> 148 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" help="If not set, will be set to 3 * knn"/>
154 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" 149 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data."/>
155 help="If not set, will be set to 3 * knn" />
156 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
157 help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
158 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" 150 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
159 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." /> 151 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed."/>
160 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> 152 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
161 <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option> 153 <option value="exact" selected="true">"exact", the implementation described in van Dijk et al. (2018) </option>
162 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> 154 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
163 </param> 155 </param>
164 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> 156 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
165 <expand macro="distance_metric_options"/> 157 <expand macro="distance_metric_options"/>
166 </param> 158 </param>
167 <expand macro="param_random_state"/> 159 <param argument="random_state" type="integer" optional="true" label="Random seed" help="Defaults to the global numpy random number generator."/>
168 </when> 160 </when>
169 </conditional> 161 </conditional>
170 <expand macro="inputs_common_advanced"/> 162 <expand macro="inputs_common_advanced"/>
171 </inputs> 163 </inputs>
172 <outputs> 164 <outputs>
173 <expand macro="anndata_outputs"/> 165 <expand macro="anndata_outputs"/>
174 </outputs> 166 </outputs>
175 <tests> 167 <tests>
176 <test expect_num_outputs="2"> 168
177 <!-- test 1 --> 169 <!-- test 1 -->
178 <param name="adata" value="krumsiek11.h5ad" /> 170 <test expect_num_outputs="2">
171 <param name="adata" value="krumsiek11.h5ad"/>
179 <conditional name="method"> 172 <conditional name="method">
180 <param name="method" value="pp.normalize_total"/> 173 <param name="method" value="pp.normalize_total"/>
181 <conditional name="exclude_highly_expressed">
182 <param name="exclude_highly_expressed" value="False"/>
183 </conditional>
184 <param name="key_added" value="n_counts"/> 174 <param name="key_added" value="n_counts"/>
185 <param name="layers" value="all"/> 175 </conditional>
186 <param name="layer_norm" value="None"/> 176 <section name="advanced_common">
187 </conditional> 177 <param name="show_log" value="true"/>
188 <section name="advanced_common">
189 <param name="show_log" value="true" />
190 </section> 178 </section>
191 <output name="hidden_output"> 179 <output name="hidden_output">
192 <assert_contents> 180 <assert_contents>
193 <has_text_matching expression="sc.pp.normalize_total"/> 181 <has_text_matching expression="sc.pp.normalize_total"/>
194 <has_text_matching expression="exclude_highly_expressed=False"/> 182 <has_text_matching expression="exclude_highly_expressed=False"/>
195 <has_text_matching expression="key_added='n_counts'"/> 183 <has_text_matching expression="key_added='n_counts'"/>
196 <has_text_matching expression="layers='all'"/> 184 </assert_contents>
197 </assert_contents> 185 </output>
198 </output> 186 <output name="anndata_out" ftype="h5ad">
199 <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 187 <assert_contents>
200 </test> 188 <has_h5_keys keys="obs/n_counts"/>
201 <test expect_num_outputs="2"> 189 </assert_contents>
202 <!-- test 2 --> 190 </output>
191 </test>
192
193 <!-- test 2 -->
194 <test expect_num_outputs="2">
203 <param name="adata" value="random-randint.h5ad"/> 195 <param name="adata" value="random-randint.h5ad"/>
204 <conditional name="method"> 196 <conditional name="method">
205 <param name="method" value="pp.recipe_zheng17"/> 197 <param name="method" value="pp.recipe_zheng17"/>
206 <param name="n_top_genes" value="1000"/> 198 </conditional>
207 <param name="log" value="True"/> 199 <section name="advanced_common">
208 </conditional> 200 <param name="show_log" value="true"/>
209 <section name="advanced_common">
210 <param name="show_log" value="true" />
211 </section> 201 </section>
212 <output name="hidden_output"> 202 <output name="hidden_output">
213 <assert_contents> 203 <assert_contents>
214 <has_text_matching expression="sc.pp.recipe_zheng17"/> 204 <has_text_matching expression="sc.pp.recipe_zheng17"/>
215 <has_text_matching expression="n_top_genes=1000"/> 205 <has_text_matching expression="n_top_genes=1000"/>
216 <has_text_matching expression="log=True"/> 206 <has_text_matching expression="log=True"/>
217 </assert_contents> 207 </assert_contents>
218 </output> 208 </output>
219 <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> 209 <output name="anndata_out" ftype="h5ad">
220 </test> 210 <assert_contents>
221 <test expect_num_outputs="2"> 211 <has_h5_keys keys="obs/n_counts_all"/>
222 <!-- test 3 --> 212 <has_h5_keys keys="var/n_counts,var/mean,var/std"/>
223 <param name="adata" value="paul15_subsample.h5ad" /> 213 <has_h5_keys keys="uns/log1p"/>
214 </assert_contents>
215 </output>
216 </test>
217
218 <!-- test 3 -->
219 <test expect_num_outputs="2">
220 <param name="adata" value="paul15_subsample.h5ad"/>
224 <conditional name="method"> 221 <conditional name="method">
225 <param name="method" value="pp.recipe_weinreb17"/> 222 <param name="method" value="pp.recipe_weinreb17"/>
226 <param name="log" value="True"/> 223 </conditional>
227 <param name="mean_threshold" value="0.01"/> 224 <section name="advanced_common">
228 <param name="cv_threshold" value="2.0"/> 225 <param name="show_log" value="true"/>
229 <param name="n_pcs" value="50"/>
230 <param name="svd_solver" value="randomized"/>
231 <param name="random_state" value="0"/>
232 </conditional>
233 <section name="advanced_common">
234 <param name="show_log" value="true" />
235 </section> 226 </section>
236 <output name="hidden_output"> 227 <output name="hidden_output">
237 <assert_contents> 228 <assert_contents>
238 <has_text_matching expression="sc.pp.recipe_weinreb17"/> 229 <has_text_matching expression="sc.pp.recipe_weinreb17"/>
239 <has_text_matching expression="log=True"/> 230 <has_text_matching expression="log=True"/>
240 <has_text_matching expression="mean_threshold=0.01"/> 231 <has_text_matching expression="mean_threshold=0.01"/>
241 <has_text_matching expression="cv_threshold=2.0"/> 232 <has_text_matching expression="cv_threshold=2"/>
242 <has_text_matching expression="n_pcs=50"/> 233 <has_text_matching expression="n_pcs=50"/>
243 <has_text_matching expression="svd_solver='randomized'"/> 234 <has_text_matching expression="svd_solver='randomized'"/>
244 <has_text_matching expression="random_state=0"/> 235 <has_text_matching expression="random_state=0"/>
245 </assert_contents> 236 </assert_contents>
246 </output> 237 </output>
247 <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> 238 <output name="anndata_out" ftype="h5ad">
248 </test> 239 <assert_contents>
249 <test expect_num_outputs="2"> 240 <has_h5_keys keys="uns/log1p"/>
250 <!-- test 4 --> 241 </assert_contents>
251 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> 242 </output>
243 </test>
244
245 <!-- test 4 -->
246 <test expect_num_outputs="2">
247 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad"/>
252 <conditional name="method"> 248 <conditional name="method">
253 <param name="method" value="pp.recipe_seurat"/> 249 <param name="method" value="pp.recipe_seurat"/>
254 <param name="log" value="True"/> 250 </conditional>
255 </conditional> 251 <section name="advanced_common">
256 <section name="advanced_common"> 252 <param name="show_log" value="true"/>
257 <param name="show_log" value="true" />
258 </section> 253 </section>
259 <output name="hidden_output"> 254 <output name="hidden_output">
260 <assert_contents> 255 <assert_contents>
261 <has_text_matching expression="sc.pp.recipe_seurat"/> 256 <has_text_matching expression="sc.pp.recipe_seurat"/>
262 <has_text_matching expression="log=True"/> 257 <has_text_matching expression="log=True"/>
263 </assert_contents> 258 </assert_contents>
264 </output> 259 </output>
265 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> 260 <output name="anndata_out" ftype="h5ad">
266 </test> 261 <assert_contents>
267 <test expect_num_outputs="2"> 262 <has_h5_keys keys="obs/n_genes"/>
268 <!-- test 5 --> 263 <has_h5_keys keys="var/n_cells"/>
269 <param name="adata" value="krumsiek11.h5ad" /> 264 <has_h5_keys keys="uns/log1p"/>
265 </assert_contents>
266 </output>
267 </test>
268
269 <!-- test 5 -->
270 <test expect_num_outputs="2">
271 <param name="adata" value="krumsiek11.h5ad"/>
270 <conditional name="method"> 272 <conditional name="method">
271 <param name="method" value="external.pp.magic"/> 273 <param name="method" value="external.pp.magic"/>
272 <param name="name_list" value="all_genes"/>
273 <param name="t" value="-1"/> 274 <param name="t" value="-1"/>
274 <param name="n_pca" value="5"/> 275 <param name="n_pca" value="5"/>
275 </conditional> 276 </conditional>
276 <section name="advanced_common"> 277 <section name="advanced_common">
277 <param name="show_log" value="true" /> 278 <param name="show_log" value="true"/>
278 </section> 279 </section>
279 <output name="hidden_output"> 280 <output name="hidden_output">
280 <assert_contents> 281 <assert_contents>
281 <has_text_matching expression="external.pp.magic"/> 282 <has_text_matching expression="external.pp.magic"/>
282 <has_text_matching expression="name_list='all_genes'"/> 283 <has_text_matching expression="name_list='all_genes'"/>
283 <has_text_matching expression="t='auto'"/> 284 <has_text_matching expression="t='auto'"/>
284 <has_text_matching expression="n_pca=5"/> 285 <has_text_matching expression="n_pca=5"/>
285 </assert_contents> 286 <has_text_matching expression="stats before magic: min= -0.01630 max= 1.01060 mean= 0.28644"/>
286 </output> 287 <has_text_matching expression="stats after magic: min= -0.00857 max= 1.00546 mean= 0.28645"/>
287 <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 288 </assert_contents>
288 </test> 289 </output>
289 <test expect_num_outputs="2"> 290 <output name="anndata_out" ftype="h5ad">
290 <!-- test 6 --> 291 <assert_contents>
291 <param name="adata" value="krumsiek11.h5ad" /> 292 <has_h5_keys keys="obs/cell_type"/>
293 </assert_contents>
294 </output>
295 </test>
296
297 <!-- test 6 -->
298 <test expect_num_outputs="2">
299 <param name="adata" value="krumsiek11.h5ad"/>
292 <conditional name="method"> 300 <conditional name="method">
293 <param name="method" value="external.pp.magic"/> 301 <param name="method" value="external.pp.magic"/>
294 <param name="name_list" value="pca_only"/> 302 <param name="name_list" value="pca_only"/>
295 <param name="t" value="3"/> 303 <param name="t" value="3"/>
296 <param name="n_pca" value="5"/> 304 <param name="n_pca" value="5"/>
297 </conditional> 305 </conditional>
298 <section name="advanced_common"> 306 <section name="advanced_common">
299 <param name="show_log" value="true" /> 307 <param name="show_log" value="true"/>
300 </section> 308 </section>
301 <output name="hidden_output"> 309 <output name="hidden_output">
302 <assert_contents> 310 <assert_contents>
303 <has_text_matching expression="external.pp.magic"/> 311 <has_text_matching expression="external.pp.magic"/>
304 <has_text_matching expression="name_list='pca_only'"/> 312 <has_text_matching expression="name_list='pca_only'"/>
305 <has_text_matching expression="t=3"/> 313 <has_text_matching expression="t=3"/>
306 <has_text_matching expression="n_pca=5"/> 314 <has_text_matching expression="n_pca=5"/>
307 </assert_contents> 315 </assert_contents>
308 </output> 316 </output>
309 <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 317 <output name="anndata_out" ftype="h5ad">
318 <assert_contents>
319 <has_h5_keys keys="obsm/X_magic"/>
320 </assert_contents>
321 </output>
310 <assert_stdout> 322 <assert_stdout>
311 <has_text text="X_magic"/> 323 <has_text text="X_magic"/>
312 </assert_stdout> 324 </assert_stdout>
313 </test> 325 </test>
314 </tests> 326 </tests>
315 <help><![CDATA[ 327 <help><![CDATA[
316 Normalize total counts per cell (`pp.normalize_per_cell`) 328 Normalize total counts per cell (`pp.normalize_total`)
317 ========================================================= 329 ======================================================
318 330
319 Normalize each cell by total counts over all genes, so that every cell has 331 Normalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.
320 the same total count after normalization.
321 332
322 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. 333 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
323 334
324 More details on the `scanpy documentation 335 More details on the `scanpy documentation
325 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ 336 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_total.html>`__
326 337
327 338
328 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) 339 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
329 ================================================================================================================== 340 ==================================================================================================================
330 341
367 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) 378 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
368 ============================================================================================================ 379 ============================================================================================================
369 380
370 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. 381 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
371 382
372 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). 383 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).
373 384
374 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. 385 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
375 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. 386 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
376 387
377 More details on the `scanpy documentation 388 More details on the `scanpy documentation