Mercurial > repos > ebi-gxa > anndata_ops
comparison anndata_operations.xml @ 22:fe75d2414dc0 draft
"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 8853a954bd9c343b3458479582f1f21c3981fef0-dirty"
author | ebi-gxa |
---|---|
date | Mon, 23 Aug 2021 11:32:48 +0000 |
parents | 53a251c6d991 |
children | 31e5e6d606ef |
comparison
equal
deleted
inserted
replaced
21:93ecc487a7d1 | 22:fe75d2414dc0 |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | 1 <?xml version="1.0" encoding="utf-8"?> |
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> | 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@"> |
3 <description>modifies metadata and flags genes</description> | 3 <description>modifies metadata and flags genes</description> |
4 <macros> | 4 <macros> |
5 <import>scanpy_macros2.xml</import> | 5 <import>scanpy_macros2.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
64 #if not $s.keep_original: | 64 #if not $s.keep_original: |
65 del adata.obs['${s.from_obs}'] | 65 del adata.obs['${s.from_obs}'] |
66 #end if | 66 #end if |
67 #end for | 67 #end for |
68 | 68 |
69 #for $i, $s in enumerate($var_modifications) | |
70 adata.var['${s.to_var}'] = adata.var['${s.from_var}'] | |
71 #if not $s.keep_original: | |
72 del adata.var['${s.from_var}'] | |
73 #end if | |
74 #end for | |
75 | |
69 gene_names = getattr(adata.var, gene_name) | 76 gene_names = getattr(adata.var, gene_name) |
70 | 77 |
71 #for $i, $flag in enumerate($gene_flags) | 78 #for $i, $flag in enumerate($gene_flags) |
72 k_cat = gene_names.str.startswith('${flag.startswith}') | 79 k_cat = gene_names.str.startswith('${flag.startswith}') |
73 if k_cat.sum() > 0: | 80 if k_cat.sum() > 0: |
90 #if $copy_x.default and len($copy_x.xlayers) > 0: | 97 #if $copy_x.default and len($copy_x.xlayers) > 0: |
91 #for $i, $x_s in enumerate($copy_x.xlayers): | 98 #for $i, $x_s in enumerate($copy_x.xlayers): |
92 ad_s = sc.read('x_source_${i}.h5') | 99 ad_s = sc.read('x_source_${i}.h5') |
93 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names): | 100 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names): |
94 #set xs=$copy_x.xlayers[$i] | 101 #set xs=$copy_x.xlayers[$i] |
95 if "${xs.dest}" == '': | 102 if "${xs.dest}" == '': |
96 logging.error("%sth destination layer for %sth X source not specified" % ("${i}", "${i}")) | 103 logging.error("%sth destination layer for %sth X source not specified" % ("${i}", "${i}")) |
97 sys.exit(1) | 104 sys.exit(1) |
98 adata.layers["${xs.dest}"] = ad_s.X | 105 adata.layers["${xs.dest}"] = ad_s.X |
99 else: | 106 else: |
100 logging.error("X source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") | 107 logging.error("X source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") |
110 #for $j, $l_key in enumerate($copy_l.layers): | 117 #for $j, $l_key in enumerate($copy_l.layers): |
111 layers_to_copy = (k for k in ad_s.layers.keys() if "${l_key.contains}" in k) | 118 layers_to_copy = (k for k in ad_s.layers.keys() if "${l_key.contains}" in k) |
112 for l_to_copy in layers_to_copy: | 119 for l_to_copy in layers_to_copy: |
113 suffix='' | 120 suffix='' |
114 if l_to_copy in adata.layers: | 121 if l_to_copy in adata.layers: |
115 suffix = "_${i}" | 122 suffix = "_${i}" |
116 | 123 |
117 adata.layers[l_to_copy+suffix] = ad_s.layers[l_to_copy] | 124 adata.layers[l_to_copy+suffix] = ad_s.layers[l_to_copy] |
118 #end for | 125 #end for |
119 else: | 126 else: |
120 logging.error("Layer source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") | 127 logging.error("Layer source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") |
121 sys.exit(1) | 128 sys.exit(1) |
130 #for $j, $o_key in enumerate($copy_o.obs_keys): | 137 #for $j, $o_key in enumerate($copy_o.obs_keys): |
131 keys_to_copy = (k for k in ad_s.obs.keys() if "${o_key.contains}" in k) | 138 keys_to_copy = (k for k in ad_s.obs.keys() if "${o_key.contains}" in k) |
132 for k_to_copy in keys_to_copy: | 139 for k_to_copy in keys_to_copy: |
133 suffix='' | 140 suffix='' |
134 if k_to_copy in adata.obs: | 141 if k_to_copy in adata.obs: |
135 suffix = "_${i}" | 142 suffix = "_${i}" |
136 | 143 |
137 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]] | 144 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]] |
138 if k_to_copy in ad_s.uns.keys(): | 145 if k_to_copy in ad_s.uns.keys(): |
139 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy] | 146 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy] |
140 #end for | 147 #end for |
141 else: | 148 else: |
218 </sanitizer> | 225 </sanitizer> |
219 </param> | 226 </param> |
220 <param name="to_obs" type="text" label="New name" help="New name in observations that you want to change"/> | 227 <param name="to_obs" type="text" label="New name" help="New name in observations that you want to change"/> |
221 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/> | 228 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/> |
222 </repeat> | 229 </repeat> |
230 <repeat name="var_modifications" title="Change field names in AnnData var" min="0"> | |
231 <param name="from_var" type="text" label="Original name" help="Name in var that you want to change"> | |
232 <sanitizer> | |
233 <valid initial="string.printable"/> | |
234 </sanitizer> | |
235 </param> | |
236 <param name="to_var" type="text" label="New name" help="New name in var that you want to change"/> | |
237 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/> | |
238 </repeat> | |
223 <param name="gene_symbols_field" value='index' type="text" label="Gene symbols field in AnnData" help="Field inside var.params where the gene symbols are, normally 'index' or 'gene_symbols'"/> | 239 <param name="gene_symbols_field" value='index' type="text" label="Gene symbols field in AnnData" help="Field inside var.params where the gene symbols are, normally 'index' or 'gene_symbols'"/> |
224 <repeat name="gene_flags" title="Flag genes that start with these names"> | 240 <repeat name="gene_flags" title="Flag genes that start with these names"> |
225 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/> | 241 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/> |
226 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/> | 242 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/> |
227 </repeat> | 243 </repeat> |
295 <test> | 311 <test> |
296 <param name="input_obj_file" value="find_cluster.h5"/> | 312 <param name="input_obj_file" value="find_cluster.h5"/> |
297 <param name="input_format" value="anndata"/> | 313 <param name="input_format" value="anndata"/> |
298 <param name="color_by" value="louvain"/> | 314 <param name="color_by" value="louvain"/> |
299 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> | 315 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> |
316 </test> | |
317 <test> | |
318 <param name="input_obj_file" value="anndata_ops.h5"/> | |
319 <param name="from_var" value = "gene_symbols" /> | |
320 <param name="to_var" value = "hello_all" /> | |
321 <output name="output_h5ad" ftype="h5ad"> | |
322 <assert_contents> | |
323 <has_h5_keys keys="var/hello_all" /> | |
324 </assert_contents> | |
325 </output> | |
300 </test> | 326 </test> |
301 <test> | 327 <test> |
302 <param name="input_obj_file" value="find_cluster.h5"/> | 328 <param name="input_obj_file" value="find_cluster.h5"/> |
303 <param name="input_format" value="anndata"/> | 329 <param name="input_format" value="anndata"/> |
304 <conditional name="copy_r"> | 330 <conditional name="copy_r"> |
350 Operations on AnnData objects | 376 Operations on AnnData objects |
351 ============================= | 377 ============================= |
352 | 378 |
353 Performs the following operations: | 379 Performs the following operations: |
354 | 380 |
355 * Change observation fields, mostly for downstreaming processes convenience. Multiple fields can be changed as one. | 381 * Change observation/var fields, mostly for downstreaming processes convenience. Multiple fields can be changed as one. |
356 * Flag genes that start with a certain text: useful for flagging mitochondrial, spikes or other groups of genes. | 382 * Flag genes that start with a certain text: useful for flagging mitochondrial, spikes or other groups of genes. |
357 * For the flags created, calculates qc metrics (pct_<flag>_counts). | 383 * For the flags created, calculates qc metrics (pct_<flag>_counts). |
358 * Calculates `n_genes`, `n_counts` for cells and `n_cells`, `n_counts` for genes. | 384 * Calculates `n_genes`, `n_counts` for cells and `n_cells`, `n_counts` for genes. |
359 * For top <N> genes specified, calculate qc metrics (pct_counts_in_top_<N>_genes). | 385 * For top <N> genes specified, calculate qc metrics (pct_counts_in_top_<N>_genes). |
360 * Copy from a set of compatible AnnData objects (same cells and genes): | 386 * Copy from a set of compatible AnnData objects (same cells and genes): |