comparison remove_confounders.xml @ 17:b70219da4a96 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author iuc
date Sat, 14 Sep 2024 12:43:38 +0000
parents c65fce500915
children
comparison
equal deleted inserted replaced
16:a39040ae0a38 17:b70219da4a96
1 <tool id="scanpy_remove_confounders" name="Remove confounders" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> 1 <tool id="scanpy_remove_confounders" name="Scanpy remove confounders" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>with scanpy</description> 2 <description>with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements">
8 <requirement type="package" version="1.6.0">bbknn</requirement>
9 <requirement type="package" version="0.0.10">harmonypy</requirement>
10 <requirement type="package" version="1.7.4">scanorama</requirement>
11 <requirement type="package" version="0.5.13">pynndescent</requirement>
12 </expand>
8 <command detect_errors="exit_code"><![CDATA[ 13 <command detect_errors="exit_code"><![CDATA[
9 @CMD@ 14 @CMD@
10 ]]></command> 15 ]]></command>
11 <configfiles> 16 <configfiles>
12 <configfile name="script_file"><![CDATA[ 17 <configfile name="script_file"><![CDATA[
13 @CMD_imports@ 18 @CMD_IMPORTS@
14 @CMD_read_inputs@ 19 @CMD_READ_INPUTS@
15 20
16 #if $method.method == "pp.regress_out" 21 #if str($method.method) == 'pp.regress_out':
22 print("stats before regress_out:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
23
24 import os
17 sc.pp.regress_out( 25 sc.pp.regress_out(
18 adata=adata, 26 adata=adata,
27 #if str($method.layer) != '':
28 layer='$method.layer',
29 #end if
19 #set $keys = [str(x.strip()) for x in str($method.keys).split(',')] 30 #set $keys = [str(x.strip()) for x in str($method.keys).split(',')]
20 keys=$keys, 31 keys=$keys,
32 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)),
21 copy=False) 33 copy=False)
22 34
23 #else if $method.method == "pp.mnn_correct" 35 print("stats after regress_out:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
24 #for i, filepath in enumerate($methods.extra_adata) 36
25 adata_$i = ad.read('$filepath') 37 ## This function is commented out because the conda package is not working. Please add this if there is user demand and the conda package is fixed. If not please remove in the next update.
26 #end for 38 ## #else if str($method.method) == 'external.pp.mnn_correct':
27 39 ## #if $method.extra_adata:
28 sc.pp.mnn_correct( 40 ## #for i, filepath in enumerate($method.extra_adata)
29 adata, 41 ## adata_$i = sc.read_h5ad('$filepath')
30 #for i, filepath in enumerate($methods.extra_adata) 42 ## #end for
31 adata_$i, 43 ## #end if
32 #end for 44 ## import os
33 #if $methods.var_subset 45 ## corrected = sc.external.pp.mnn_correct(
34 #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')]) 46 ## adata,
35 var_subset=$var_subset, 47 ## #if $method.extra_adata:
36 #end if 48 ## #for i, filepath in enumerate($method.extra_adata)
37 batch_key='$method.batch_key', 49 ## adata_$i,
38 index_unique='$method.index_unique' 50 ## #end for
39 #if $methods.batch_categories 51 ## #end if
40 #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')]) 52 ## #if str($method.var_subset) != '':
41 batch_categories=$batch_categories, 53 ## #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')])
42 #end if 54 ## var_subset=$var_subset,
43 k=$method.k, 55 ## #end if
44 sigma=$method.sigma, 56 ## batch_key='$method.batch_key',
45 cos_norm_in=$method.cos_norm_in, 57 ## index_unique='$method.index_unique',
46 cos_norm_out=$method.cos_norm_out, 58 ## #if str($method.batch_categories) != '':
47 svd_dim=$method.svd_dim, 59 ## #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')])
48 var_adj=$method.var_adj, 60 ## batch_categories=$batch_categories,
49 compute_angle=$method.compute_angle, 61 ## #end if
50 mnn_order='$method.mnn_order', 62 ## k=$method.k,
51 svd_mode='$method.svd_mode', 63 ## sigma=$method.sigma,
52 do_concatenate=True, 64 ## cos_norm_in=$method.cos_norm_in,
53 save_raw=True, 65 ## cos_norm_out=$method.cos_norm_out,
54 n_jobs=\${GALAXY_SLOTS:-4}) 66 ## #if str($method.svd_dim) != '':
55 67 ## svd_dim=$method.svd_dim,
56 #else if $method.method == "pp.combat" 68 ## #end if
69 ## var_adj=$method.var_adj,
70 ## compute_angle=$method.compute_angle,
71 ## #if str($method.mnn_order) != '':
72 ## mnn_order='$method.mnn_order',
73 ## #end if
74 ## #if str($method.svd_mode) != '':
75 ## svd_mode='$method.svd_mode',
76 ## #end if
77 ## do_concatenate=True,
78 ## save_raw=True,
79 ## n_jobs = int(os.getenv("GALAXY_SLOTS", 4)))
80
81 ## adata = corrected[0]
82
83
84 #else if str($method.method) == 'pp.combat':
85 print("stats before combat:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
86
57 sc.pp.combat( 87 sc.pp.combat(
58 adata, 88 adata,
59 key='$method.key', 89 key='$method.key',
90 #if str($method.covariates) != '':
91 #set $covariates = [str(x.strip()) for x in str($method.covariates).split(',')]
92 covariates=$covariates,
93 #end if
60 inplace=True) 94 inplace=True)
61 95
96 print("stats after combat:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
97
98 #else if str($method.method) == 'external.pp.bbknn':
99 sc.external.pp.bbknn(
100 adata,
101 batch_key='$method.batch_key',
102 use_rep='$method.use_rep',
103 #if str($method.approx.approx_method) == 'no':
104 approx=False,
105 #else if str($method.approx.approx_method) == 'annoy':
106 approx=True,
107 use_annoy=True,
108 annoy_n_trees=$method.approx.annoy_n_trees,
109 #else if str($method.approx.approx_method) == 'pyNNDescent':
110 approx=True,
111 use_annoy=False,
112 pynndescent_n_neighbors=$method.approx.pynndescent_n_neighbors,
113 pynndescent_random_state=$method.approx.pynndescent_random_state,
114 #end if
115 metric='$method.metric',
116 neighbors_within_batch=$method.neighbors_within_batch,
117 n_pcs=$method.n_pcs,
118 #if str($method.trim) != '':
119 trim=$method.trim,
120 #end if
121 set_op_mix_ratio=$method.set_op_mix_ratio,
122 local_connectivity=$method.local_connectivity,
123 copy=False)
124
125 #else if str($method.method) == 'external.pp.harmony_integrate':
126 sc.external.pp.harmony_integrate(
127 adata,
128 key='$method.key',
129 basis='$method.basis',
130 adjusted_basis='$method.adjusted_basis',
131 #if str($method.theta) != '':
132 theta=$method.theta,
133 #end if
134 #if str($method.lamb) != '':
135 lamb=$method.lamb,
136 #end if
137 sigma=$method.sigma,
138 #if str($method.nclust) != '':
139 nclust=$method.nclust,
140 #end if
141 tau=$method.tau,
142 block_size=$method.block_size,
143 max_iter_harmony=$method.max_iter_harmony,
144 max_iter_kmeans=$method.max_iter_kmeans,
145 epsilon_cluster=$method.epsilon_cluster,
146 epsilon_harmony=$method.epsilon_harmony)
147
148 #else if str($method.method) == 'external.pp.scanorama_integrate':
149 sc.external.pp.scanorama_integrate(
150 adata,
151 key='$method.key',
152 basis='$method.basis',
153 adjusted_basis='$method.adjusted_basis',
154 knn=$method.knn,
155 sigma=$method.sigma,
156 approx=$method.approx,
157 alpha=$method.alpha,
158 batch_size=$method.batch_size)
62 #end if 159 #end if
63 160
64 @CMD_anndata_write_outputs@ 161 @CMD_ANNDATA_WRITE_OUTPUTS@
65 ]]></configfile> 162 ]]></configfile>
66 </configfiles> 163 </configfiles>
67 <inputs> 164 <inputs>
68 <expand macro="inputs_anndata"/> 165 <expand macro="inputs_anndata"/>
69 <conditional name="method"> 166 <conditional name="method">
70 <param argument="method" type="select" label="Method used for plotting"> 167 <param argument="method" type="select" label="Method used for plotting">
71 <option value="pp.regress_out">Regress out unwanted sources of variation, using 'pp.regress_out'</option> 168 <option value="pp.regress_out">Regress out unwanted sources of variation, using 'pp.regress_out'</option>
72 <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using 'pp.mnn_correct'</option> 169 <!-- This function is commented out because the conda package is not working. Please add this if there is user demand and the conda package is fixed. If not please remove in the next update. -->
170 <!-- <option value="external.pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using 'pp.mnn_correct'</option> -->
73 <option value="pp.combat">Correct batch effects with ComBat function, using 'pp.combat'</option> 171 <option value="pp.combat">Correct batch effects with ComBat function, using 'pp.combat'</option>
172 <option value="external.pp.bbknn">Batch effect removal with Batch balanced KNN (BBKNN), using 'external.pp.bbknn'</option>
173 <option value="external.pp.harmony_integrate">Integrate multiple single-cell experiments with Harmony, using 'external.pp.harmony_integrate'</option>
174 <option value="external.pp.scanorama_integrate">Integrate multiple single-cell experiments with Scanorama, using 'external.pp.scanorama_integrate'</option>
74 </param> 175 </param>
75 <when value="pp.regress_out"> 176 <when value="pp.regress_out">
177 <param argument="layer" type="text" value="" label="Which element of layers to regress on">
178 <expand macro="sanitize_query"/>
179 </param>
76 <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma"> 180 <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma">
77 <expand macro="sanitize_query" /> 181 <expand macro="sanitize_query"/>
78 </param> 182 </param>
79 </when> 183 </when>
80 <when value="pp.mnn_correct"> 184 <!-- This function is commented out because the conda package is not working. Please add this if there is user demand and the conda package is fixed. If not please remove in the next update. -->
185 <!-- <when value="external.pp.mnn_correct">
81 <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/> 186 <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/>
82 <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from '.var_names'. If not set, all vars are used"> 187 <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from '.var_names'. If not set, all vars are used">
83 <expand macro="sanitize_query" /> 188 <expand macro="sanitize_query"/>
84 </param> 189 </param>
85 <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate"> 190 <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate">
86 <expand macro="sanitize_query" /> 191 <expand macro="sanitize_query"/>
87 </param> 192 </param>
88 <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices"> 193 <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
89 <option value="-">-</option> 194 <option value="-">-</option>
90 <option value="_">_</option> 195 <option value="_">_</option>
91 <option value=" "> </option> 196 <option value=" "> </option>
92 <option value="/">/</option> 197 <option value="/">/</option>
93 </param> 198 </param>
94 <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key"> 199 <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key">
95 <expand macro="sanitize_query" /> 200 <expand macro="sanitize_query"/>
96 </param> 201 </param>
97 <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/> 202 <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/>
98 <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/> 203 <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/>
99 <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/> 204 <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/>
100 <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/> 205 <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/>
101 <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/> 206 <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/>
102 <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/> 207 <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/>
103 <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/> 208 <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/>
104 <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially"> 209 <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially">
105 <expand macro="sanitize_query" /> 210 <expand macro="sanitize_query"/>
106 </param> 211 </param>
107 <param name="svd_mode" type="select" label="SVD mode"> 212 <param name="svd_mode" type="select" label="SVD mode">
108 <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option> 213 <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option>
109 <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option> 214 <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option>
110 <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option> 215 <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option>
111 </param> 216 </param>
112 </when> 217 </when> -->
113 <when value="pp.combat"> 218 <when value="pp.combat">
114 <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal"> 219 <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal">
115 <expand macro="sanitize_query" /> 220 <expand macro="sanitize_query"/>
116 </param> 221 </param>
222 <param argument="covariates" type="text" value="" optional="true" label="Additional covariates besides the batch variable such as adjustment variables or biological condition.">
223 <expand macro="sanitize_query"/>
224 </param>
225 </when>
226 <when value="external.pp.bbknn">
227 <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate">
228 <expand macro="sanitize_query"/>
229 </param>
230 <param argument="use_rep" type="text" value="X_pca" label="The dimensionality reduction in .obsm to use for neighbour detection">
231 <expand macro="sanitize_query"/>
232 </param>
233 <conditional name="approx">
234 <param name="approx_method" type="select" label="Approximate neighbour finding">
235 <option value="annoy" selected="true">Yes, using ANNOY algorithm</option>
236 <option value="pyNNDescent">Yes, using pyNNDescent</option>
237 <option value="no">Do not use approximate neighbor finding</option>
238 </param>
239 <when value="annoy">
240 <param argument="annoy_n_trees" type="integer" value="10" label="The number of trees to construct in the annoy forest" help="More trees give higher precision when querying, at the cost of increased run time and resource intensity"/>
241 </when>
242 <when value="pyNNDescent">
243 <param argument="pynndescent_n_neighbors" type="integer" value="30" label="The number of neighbours to include in the approximate neighbour graph" help="More neighbours give higher precision when querying, at the cost of increased run time and resource intensity"/>
244 <param argument="pynndescent_random_state" type="integer" value="0" label="The RNG seed to use when creating the graph"/>
245 </when>
246 <when value="no"/>
247 </conditional>
248 <param name="metric" type="select" label="Distance metric to use for the data">
249 <expand macro="distance_metric_options"/>
250 </param>
251 <param argument="neighbors_within_batch" type="integer" value="3" label="Number of top neighbours to report for each batch" help="total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches. This then serves as the basis for the construction of a symmetrical matrix of connectivities"/>
252 <param argument="n_pcs" type="integer" value="50" label="Number of dimensions to use in the analysis" help="in case of PCA, principal components"/>
253 <param argument="trim" type="integer" value="" optional="true" label="Trim the neighbours of each cell to these many top connectivities" help=" The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If not set, sets the parameter value automatically to 10 times neighbors_within_batch times the number of batches. Set to 0 to skip."/>
254 <param argument="set_op_mix_ratio" type="float" value="1.0" min="0" max="1" label="UMAP connectivity computation parameter" help="controlling the blend between a connectivity matrix formed exclusively from mutual nearest neighbour pairs (0) and a union of all observed neighbour relationships with the mutual pairs emphasised (1)"/>
255 <param argument="local_connectivity" type="integer" value="1" label="Number of nearest neighbors of each cell are assumed to be fully connected"/>
256 </when>
257 <when value="external.pp.harmony_integrate">
258 <param argument="key" type="text" value="batch" label="The name of the column in adata.obs that differentiates among experiments/batches">
259 <expand macro="sanitize_query"/>
260 </param>
261 <param argument="basis" type="text" value="X_pca" label="The name of the field in adata.obsm where the PCA table is stored">
262 <expand macro="sanitize_query"/>
263 </param>
264 <param argument="adjusted_basis" type="text" value="X_pca_harmony" label="The name of the field in adata.obsm where the adjusted PCA table will be stored after running this function">
265 <expand macro="sanitize_query"/>
266 </param>
267 <param argument="theta" type="integer" value="" optional="true" label="Diversity clustering penalty parameter" help="Default theta=2. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."/>
268 <param argument="lamb" type="integer" value="" optional="true" min="1" label="Ridge regression penalty parameter" help="Default lamb=1. Lambda must be strictly positive. Smaller values result in more aggressive correction."/>
269 <param argument="sigma" type="float" value="0.1" label="Width of soft kmeans clusters" help="Sigma scales the distance from a cell to cluster centroids. Larger values of sigma result in cells assigned to more clusters. Smaller values of sigma make soft kmeans cluster approach hard clustering."/>
270 <param argument="nclust" type="integer" value="" optional="true" label="Number of clusters in model" help="nclust=1 equivalent to simple linear regression."/>
271 <param argument="tau" type="integer" value="0" label="Expected number of cells per cluster" help="Protection against overclustering small datasets with large ones"/>
272 <param argument="block_size" type="float" value="0.05" min="0" max="1" label="Proportion of cells to update during clustering" help="Larger values may be faster but less accurate"/>
273 <param argument="max_iter_harmony" type="integer" value="10" label="Maximum number of rounds to run Harmony" help="One round of Harmony involves one clustering and one correction step"/>
274 <param argument="max_iter_kmeans" type="integer" value="20" label="Maximum number of rounds to run clustering at each round of Harmony"/>
275 <param argument="epsilon_cluster" type="float" value="1e-5" label="Convergence tolerance for clustering round of Harmony"/>
276 <param argument="epsilon_harmony" type="float" value="1e-4" label="Convergence tolerance for Harmony"/>
277 </when>
278 <when value="external.pp.scanorama_integrate">
279 <param argument="key" type="text" value="batch" label="The name of the column in adata.obs that differentiates among experiments/batches">
280 <expand macro="sanitize_query"/>
281 </param>
282 <param argument="basis" type="text" value="X_pca" label="The name of the field in adata.obsm where the PCA table is stored">
283 <expand macro="sanitize_query"/>
284 </param>
285 <param argument="adjusted_basis" type="text" value="X_scanorama" label="The name of the field in adata.obsm where the adjusted PCA table will be stored after running this function">
286 <expand macro="sanitize_query"/>
287 </param>
288 <param argument="knn" type="integer" value="20" label="Number of nearest neighbors to use for matching"/>
289 <param argument="sigma" type="integer" value="15" label="Correction smoothing parameter on Gaussian kernel"/>
290 <param argument="approx" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use approximate nearest neighbors with Python annoy" help="greatly speeds up matching runtime"/>
291 <param argument="alpha" type="float" value="0.1" label="Alignment score minimum cutoff"/>
292 <param argument="batch_size" type="integer" value="5000" label="The batch size used in the alignment vector computation" help="Useful when integrating very large (>100k samples) datasets. Set to large value that runs within available memory."/>
117 </when> 293 </when>
118 </conditional> 294 </conditional>
119 <expand macro="inputs_common_advanced"/> 295 <expand macro="inputs_common_advanced"/>
120 </inputs> 296 </inputs>
121 <outputs> 297 <outputs>
122 <expand macro="anndata_outputs"/> 298 <expand macro="anndata_outputs"/>
123 </outputs> 299 </outputs>
124 <tests> 300 <tests>
301
302 <!-- test 1 -->
125 <test expect_num_outputs="2"> 303 <test expect_num_outputs="2">
126 <!-- test 0 --> 304 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
127 <param name="adata" value="krumsiek11.h5ad" />
128 <conditional name="method"> 305 <conditional name="method">
129 <param name="method" value="pp.regress_out"/> 306 <param name="method" value="pp.regress_out"/>
130 <param name="keys" value="cell_type"/> 307 <param name="keys" value="batch"/>
131 </conditional> 308 </conditional>
132 <section name="advanced_common"> 309 <section name="advanced_common">
133 <param name="show_log" value="true" /> 310 <param name="show_log" value="true"/>
134 </section> 311 </section>
135 <output name="hidden_output"> 312 <output name="hidden_output">
136 <assert_contents> 313 <assert_contents>
137 <has_text_matching expression="sc.pp.regress_out"/> 314 <has_text_matching expression="sc.pp.regress_out"/>
138 <has_text_matching expression="keys=\['cell_type'\]"/> 315 <has_text_matching expression="keys=\['batch'\]"/>
139 </assert_contents> 316 <has_text_matching expression="stats before regress_out: min= -0.0163 max= 1.0106 mean= 0.2864376"/>
140 </output> 317 <has_text_matching expression="stats after regress_out: min= -0.7017021868145134 max= 0.7091581022301392 mean= -1.730938624756494e-16"/>
141 <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 318 </assert_contents>
319 </output>
320 <output name="anndata_out" ftype="h5ad">
321 <assert_contents>
322 <has_h5_keys keys="varm/PCs"/>
323 </assert_contents>
324 </output>
142 </test> 325 </test>
143 <!--<test expect_num_outputs="2"> 326
144 < test 2 > 327 <!-- This function is commented out because the conda package is not working. Please add this if there is user demand and the conda package is fixed. If not please remove in the next update. -->
145 <param name="adata" value="krumsiek11.h5ad" /> 328 <!-- test 2 -->
146 <conditional name="method"> 329 <!-- <test expect_num_outputs="2">
147 <param name="method" value="pp.mnn_correct"/> 330 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
148 <param name="reg_keys" value="cell_type"/> 331 <param name="extra_adata" value="pp.pca.krumsiek11.batch.h5ad"/>
149 </conditional> 332 <conditional name="method">
333 <param name="method" value="external.pp.mnn_correct"/>
334 <param name="batch_key" value="batch"/>
335 </conditional>
336 <section name="advanced_common">
337 <param name="show_log" value="true"/>
338 </section>
150 <assert_stdout> 339 <assert_stdout>
151 <has_text_matching expression="sc.pp.mnn_correct"/> 340 <has_text_matching expression="I_have_to_check"/>
152 <has_text_matching expression="keys='cell_type'"/>
153 </assert_stdout> 341 </assert_stdout>
154 <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 342 <output name="hidden_output">
155 </test>--> 343 <assert_contents>
344 <has_text_matching expression="sc.external.pp.mnn_correct"/>
345 <has_text_matching expression="batch_key='batch'"/>
346 </assert_contents>
347 </output>
348 <output name="anndata_out" file="external.pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
349 </test> -->
350
351 <!-- test 3 -->
156 <test expect_num_outputs="2"> 352 <test expect_num_outputs="2">
157 <!-- test 1 --> 353 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
158 <param name="adata" value="blobs.h5ad" />
159 <conditional name="method"> 354 <conditional name="method">
160 <param name="method" value="pp.combat"/> 355 <param name="method" value="pp.combat"/>
161 <param name="key" value="blobs"/> 356 </conditional>
162 </conditional> 357 <section name="advanced_common">
163 <section name="advanced_common"> 358 <param name="show_log" value="true"/>
164 <param name="show_log" value="true" />
165 </section> 359 </section>
166 <output name="hidden_output"> 360 <output name="hidden_output">
167 <assert_contents> 361 <assert_contents>
168 <has_text_matching expression="sc.pp.combat"/> 362 <has_text_matching expression="sc.pp.combat"/>
169 <has_text_matching expression="key='blobs'"/> 363 <has_text_matching expression="key='batch'"/>
170 </assert_contents> 364 <has_text_matching expression="stats before combat: min= -0.0163 max= 1.0106 mean= 0.2864376"/>
171 </output> 365 <has_text_matching expression="stats after combat: min= -0.07474318799213325 max= 1.2280063438242503 mean= 0.2870530757430964"/>
172 <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> 366 </assert_contents>
367 </output>
368 <output name="anndata_out" ftype="h5ad">
369 <assert_contents>
370 <has_h5_keys keys="obsm/X_pca"/>
371 </assert_contents>
372 </output>
373 </test>
374
375 <!-- test 4 -->
376 <test expect_num_outputs="2">
377 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
378 <conditional name="method">
379 <param name="method" value="external.pp.bbknn"/>
380 <param name="n_pcs" value="10"/>
381 <param name="set_op_mix_ratio" value="0.5"/>
382 </conditional>
383 <section name="advanced_common">
384 <param name="show_log" value="true"/>
385 </section>
386 <output name="hidden_output">
387 <assert_contents>
388 <has_text_matching expression="external.pp.bbknn"/>
389 <has_text_matching expression="batch_key='batch'"/>
390 <has_text_matching expression="use_rep='X_pca'"/>
391 <has_text_matching expression="use_annoy=True"/>
392 <has_text_matching expression="annoy_n_trees=10"/>
393 <has_text_matching expression="neighbors_within_batch=3"/>
394 <has_text_matching expression="n_pcs=10"/>
395 <has_text_matching expression="set_op_mix_ratio=0.5"/>
396 </assert_contents>
397 </output>
398 <output name="anndata_out" ftype="h5ad">
399 <assert_contents>
400 <has_h5_keys keys="uns/neighbors"/>
401 <has_h5_keys keys="obsp/distances,obsp/connectivities"/>
402 </assert_contents>
403 </output>
404 </test>
405
406 <!-- test 5 -->
407 <test expect_num_outputs="2">
408 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
409 <conditional name="method">
410 <param name="method" value="external.pp.bbknn"/>
411 <conditional name="approx">
412 <param name="approx_method" value="pyNNDescent"/>
413 <param name="pynndescent_n_neighbors" value="10"/>
414 </conditional>
415 <param name="n_pcs" value="10"/>
416 <param name="set_op_mix_ratio" value="0.5"/>
417 </conditional>
418 <section name="advanced_common">
419 <param name="show_log" value="true"/>
420 </section>
421 <output name="hidden_output">
422 <assert_contents>
423 <has_text_matching expression="external.pp.bbknn"/>
424 <has_text_matching expression="batch_key='batch'"/>
425 <has_text_matching expression="use_rep='X_pca'"/>
426 <has_text_matching expression="use_annoy=False"/>
427 <has_text_matching expression="pynndescent_n_neighbors=10"/>
428 <has_text_matching expression="pynndescent_random_state=0"/>
429 <has_text_matching expression="neighbors_within_batch=3"/>
430 <has_text_matching expression="n_pcs=10"/>
431 <has_text_matching expression="set_op_mix_ratio=0.5"/>
432 </assert_contents>
433 </output>
434 <output name="anndata_out" ftype="h5ad">
435 <assert_contents>
436 <has_h5_keys keys="uns/neighbors"/>
437 <has_h5_keys keys="obsp/distances,obsp/connectivities"/>
438 </assert_contents>
439 </output>
440 </test>
441
442 <!-- test 6 -->
443 <test expect_num_outputs="2">
444 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
445 <conditional name="method">
446 <param name="method" value="external.pp.harmony_integrate"/>
447 <param name="theta" value="2"/>
448 <param name="lamb" value="1"/>
449 <param name="block_size" value="0.1"/>
450 </conditional>
451 <section name="advanced_common">
452 <param name="show_log" value="true"/>
453 </section>
454 <output name="hidden_output">
455 <assert_contents>
456 <has_text_matching expression="external.pp.harmony_integrate"/>
457 <has_text_matching expression="key='batch'"/>
458 <has_text_matching expression="basis='X_pca'"/>
459 <has_text_matching expression="adjusted_basis='X_pca_harmony'"/>
460 <has_text_matching expression="theta=2"/>
461 <has_text_matching expression="lamb=1"/>
462 <has_text_matching expression="block_size=0.1"/>
463 </assert_contents>
464 </output>
465 <output name="anndata_out" ftype="h5ad">
466 <assert_contents>
467 <has_h5_keys keys="obsm/X_pca_harmony"/>
468 </assert_contents>
469 </output>
470 </test>
471
472 <!-- test 7 -->
473 <test expect_num_outputs="2">
474 <param name="adata" value="pp.pca.krumsiek11.batch.h5ad"/>
475 <conditional name="method">
476 <param name="method" value="external.pp.scanorama_integrate"/>
477 <param name="knn" value="2"/>
478 <param name="sigma" value="10"/>
479 <param name="batch_size" value="100"/>
480 </conditional>
481 <section name="advanced_common">
482 <param name="show_log" value="true"/>
483 </section>
484 <output name="hidden_output">
485 <assert_contents>
486 <has_text_matching expression="external.pp.scanorama_integrate"/>
487 <has_text_matching expression="key='batch'"/>
488 <has_text_matching expression="basis='X_pca'"/>
489 <has_text_matching expression="adjusted_basis='X_scanorama'"/>
490 <has_text_matching expression="knn=2"/>
491 <has_text_matching expression="sigma=1"/>
492 <has_text_matching expression="batch_size=100"/>
493 </assert_contents>
494 </output>
495 <output name="anndata_out" ftype="h5ad">
496 <assert_contents>
497 <has_h5_keys keys="obsm/X_scanorama"/>
498 </assert_contents>
499 </output>
173 </test> 500 </test>
174 </tests> 501 </tests>
175 <help><![CDATA[ 502 <help><![CDATA[
176 Regress out unwanted sources of variation, using `pp.regress_out` 503 Regress out unwanted sources of variation, using `pp.regress_out`
177 ================================================================= 504 =================================================================
178 505
179 Regress out unwanted sources of variation, using simple linear regression. This is 506 Regress out unwanted sources of variation, using simple linear regression. This is
180 inspired by Seurat's `regressOut` function in R. 507 inspired by Seurat's `regressOut` function in R.
181 508
182 More details on the `scanpy documentation 509 More details on the `scanpy documentation
183 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.regress_out.html>`__ 510 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.regress_out.html>`__
184 511
185 Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct` 512 .. This function is commented out because the conda package is not working. Please add this if there is user demand and the conda package is fixed. If not please remove in the next update.
186 ================================================================================== 513 .. Correct batch effects by matching mutual nearest neighbors, using `external.pp.mnn_correct`
187 514 .. ===========================================================================================
188 This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the 515
189 original order containing corrected expression values or a concatenated matrix or AnnData object. 516 .. This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the
190 517 .. original order containing corrected expression values or a concatenated matrix or AnnData object.
191 Be reminded that it is not advised to use the corrected data matrices for differential expression testing. 518
192 519 .. Be reminded that it is not advised to use the corrected data matrices for differential expression testing.
193 More details on the `scanpy documentation 520
194 <https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.mnn_correct.html>`__ 521 .. More details on the `scanpy documentation
522 .. <https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.mnn_correct.html>`__
195 523
196 524
197 Correct batch effects with ComBat function (`pp.combat`) 525 Correct batch effects with ComBat function (`pp.combat`)
198 ======================================================== 526 ========================================================
199 527
200 Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat 528 Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat
201 529
202 More details on the `scanpy documentation 530 More details on the `scanpy documentation
203 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.combat.html>`__ 531 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.combat.html>`__
204 532
533
534 Correct batch effects with bbknn function (`external.pp.bbknn`)
535 ===============================================================
536
537 Batch balanced kNN alters the kNN procedure to identify each cell’s top neighbours in each batch separately instead of the entire cell pool with no accounting for batch. The nearest neighbours for each batch are then merged to create a final list of neighbours for the cell. Aligns batches in a quick and lightweight manner.
538
539 More details on the `scanpy documentation
540 <https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.bbknn.html>`__
541
542
543 Correct batch effects with harmony function (`external.pp.harmony_integrate`)
544 =============================================================================
545
546 Harmony is an algorithm for integrating single-cell data from multiple experiments.
547 As Harmony works by adjusting the principal components, this function should be run after performing PCA but before computing the neighbor graph.
548
549 More details on the `scanpy documentation
550 <https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.harmony_integrate.html>`__
551
552
553 Correct batch effects with scanprama function (`external.pp.scanorama_integrate`)
554 =================================================================================
555
556 Scanprama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing PCA but before computing the neighbor graph.
557
558 More details on the `scanpy documentation
559 <https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.scanorama_integrate.html>`__
205 560
206 ]]></help> 561 ]]></help>
207 <expand macro="citations"/> 562 <expand macro="citations"/>
208 </tool> 563 </tool>