comparison anndata_operations.xml @ 22:fe75d2414dc0 draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 8853a954bd9c343b3458479582f1f21c3981fef0-dirty"
author ebi-gxa
date Mon, 23 Aug 2021 11:32:48 +0000
parents 53a251c6d991
children 31e5e6d606ef
comparison
equal deleted inserted replaced
21:93ecc487a7d1 22:fe75d2414dc0
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
3 <description>modifies metadata and flags genes</description> 3 <description>modifies metadata and flags genes</description>
4 <macros> 4 <macros>
5 <import>scanpy_macros2.xml</import> 5 <import>scanpy_macros2.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
64 #if not $s.keep_original: 64 #if not $s.keep_original:
65 del adata.obs['${s.from_obs}'] 65 del adata.obs['${s.from_obs}']
66 #end if 66 #end if
67 #end for 67 #end for
68 68
69 #for $i, $s in enumerate($var_modifications)
70 adata.var['${s.to_var}'] = adata.var['${s.from_var}']
71 #if not $s.keep_original:
72 del adata.var['${s.from_var}']
73 #end if
74 #end for
75
69 gene_names = getattr(adata.var, gene_name) 76 gene_names = getattr(adata.var, gene_name)
70 77
71 #for $i, $flag in enumerate($gene_flags) 78 #for $i, $flag in enumerate($gene_flags)
72 k_cat = gene_names.str.startswith('${flag.startswith}') 79 k_cat = gene_names.str.startswith('${flag.startswith}')
73 if k_cat.sum() > 0: 80 if k_cat.sum() > 0:
90 #if $copy_x.default and len($copy_x.xlayers) > 0: 97 #if $copy_x.default and len($copy_x.xlayers) > 0:
91 #for $i, $x_s in enumerate($copy_x.xlayers): 98 #for $i, $x_s in enumerate($copy_x.xlayers):
92 ad_s = sc.read('x_source_${i}.h5') 99 ad_s = sc.read('x_source_${i}.h5')
93 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names): 100 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
94 #set xs=$copy_x.xlayers[$i] 101 #set xs=$copy_x.xlayers[$i]
95 if "${xs.dest}" == '': 102 if "${xs.dest}" == '':
96 logging.error("%sth destination layer for %sth X source not specified" % ("${i}", "${i}")) 103 logging.error("%sth destination layer for %sth X source not specified" % ("${i}", "${i}"))
97 sys.exit(1) 104 sys.exit(1)
98 adata.layers["${xs.dest}"] = ad_s.X 105 adata.layers["${xs.dest}"] = ad_s.X
99 else: 106 else:
100 logging.error("X source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") 107 logging.error("X source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
110 #for $j, $l_key in enumerate($copy_l.layers): 117 #for $j, $l_key in enumerate($copy_l.layers):
111 layers_to_copy = (k for k in ad_s.layers.keys() if "${l_key.contains}" in k) 118 layers_to_copy = (k for k in ad_s.layers.keys() if "${l_key.contains}" in k)
112 for l_to_copy in layers_to_copy: 119 for l_to_copy in layers_to_copy:
113 suffix='' 120 suffix=''
114 if l_to_copy in adata.layers: 121 if l_to_copy in adata.layers:
115 suffix = "_${i}" 122 suffix = "_${i}"
116 123
117 adata.layers[l_to_copy+suffix] = ad_s.layers[l_to_copy] 124 adata.layers[l_to_copy+suffix] = ad_s.layers[l_to_copy]
118 #end for 125 #end for
119 else: 126 else:
120 logging.error("Layer source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") 127 logging.error("Layer source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
121 sys.exit(1) 128 sys.exit(1)
130 #for $j, $o_key in enumerate($copy_o.obs_keys): 137 #for $j, $o_key in enumerate($copy_o.obs_keys):
131 keys_to_copy = (k for k in ad_s.obs.keys() if "${o_key.contains}" in k) 138 keys_to_copy = (k for k in ad_s.obs.keys() if "${o_key.contains}" in k)
132 for k_to_copy in keys_to_copy: 139 for k_to_copy in keys_to_copy:
133 suffix='' 140 suffix=''
134 if k_to_copy in adata.obs: 141 if k_to_copy in adata.obs:
135 suffix = "_${i}" 142 suffix = "_${i}"
136 143
137 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]] 144 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]]
138 if k_to_copy in ad_s.uns.keys(): 145 if k_to_copy in ad_s.uns.keys():
139 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy] 146 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy]
140 #end for 147 #end for
141 else: 148 else:
218 </sanitizer> 225 </sanitizer>
219 </param> 226 </param>
220 <param name="to_obs" type="text" label="New name" help="New name in observations that you want to change"/> 227 <param name="to_obs" type="text" label="New name" help="New name in observations that you want to change"/>
221 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/> 228 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/>
222 </repeat> 229 </repeat>
230 <repeat name="var_modifications" title="Change field names in AnnData var" min="0">
231 <param name="from_var" type="text" label="Original name" help="Name in var that you want to change">
232 <sanitizer>
233 <valid initial="string.printable"/>
234 </sanitizer>
235 </param>
236 <param name="to_var" type="text" label="New name" help="New name in var that you want to change"/>
237 <param name="keep_original" type="boolean" label="Keep original" help="If activated, it will also keep the original column" checked="false"/>
238 </repeat>
223 <param name="gene_symbols_field" value='index' type="text" label="Gene symbols field in AnnData" help="Field inside var.params where the gene symbols are, normally 'index' or 'gene_symbols'"/> 239 <param name="gene_symbols_field" value='index' type="text" label="Gene symbols field in AnnData" help="Field inside var.params where the gene symbols are, normally 'index' or 'gene_symbols'"/>
224 <repeat name="gene_flags" title="Flag genes that start with these names"> 240 <repeat name="gene_flags" title="Flag genes that start with these names">
225 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/> 241 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/>
226 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/> 242 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/>
227 </repeat> 243 </repeat>
295 <test> 311 <test>
296 <param name="input_obj_file" value="find_cluster.h5"/> 312 <param name="input_obj_file" value="find_cluster.h5"/>
297 <param name="input_format" value="anndata"/> 313 <param name="input_format" value="anndata"/>
298 <param name="color_by" value="louvain"/> 314 <param name="color_by" value="louvain"/>
299 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> 315 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/>
316 </test>
317 <test>
318 <param name="input_obj_file" value="anndata_ops.h5"/>
319 <param name="from_var" value = "gene_symbols" />
320 <param name="to_var" value = "hello_all" />
321 <output name="output_h5ad" ftype="h5ad">
322 <assert_contents>
323 <has_h5_keys keys="var/hello_all" />
324 </assert_contents>
325 </output>
300 </test> 326 </test>
301 <test> 327 <test>
302 <param name="input_obj_file" value="find_cluster.h5"/> 328 <param name="input_obj_file" value="find_cluster.h5"/>
303 <param name="input_format" value="anndata"/> 329 <param name="input_format" value="anndata"/>
304 <conditional name="copy_r"> 330 <conditional name="copy_r">
350 Operations on AnnData objects 376 Operations on AnnData objects
351 ============================= 377 =============================
352 378
353 Performs the following operations: 379 Performs the following operations:
354 380
355 * Change observation fields, mostly for downstreaming processes convenience. Multiple fields can be changed as one. 381 * Change observation/var fields, mostly for downstreaming processes convenience. Multiple fields can be changed as one.
356 * Flag genes that start with a certain text: useful for flagging mitochondrial, spikes or other groups of genes. 382 * Flag genes that start with a certain text: useful for flagging mitochondrial, spikes or other groups of genes.
357 * For the flags created, calculates qc metrics (pct_<flag>_counts). 383 * For the flags created, calculates qc metrics (pct_<flag>_counts).
358 * Calculates `n_genes`, `n_counts` for cells and `n_cells`, `n_counts` for genes. 384 * Calculates `n_genes`, `n_counts` for cells and `n_cells`, `n_counts` for genes.
359 * For top <N> genes specified, calculate qc metrics (pct_counts_in_top_<N>_genes). 385 * For top <N> genes specified, calculate qc metrics (pct_counts_in_top_<N>_genes).
360 * Copy from a set of compatible AnnData objects (same cells and genes): 386 * Copy from a set of compatible AnnData objects (same cells and genes):