comparison anndata_operations.xml @ 9:b8eb50bd397c draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit bcab272f6785d5967b1ec68acf5390ec3ace49fd"
author ebi-gxa
date Mon, 08 Jun 2020 05:08:44 -0400
parents d586ebb8ff43
children 54f1212d26ac
comparison
equal deleted inserted replaced
8:d868221262d5 9:b8eb50bd397c
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="anndata_ops" name="AnnData Operations" version="0.0.1+galaxy2" profile="@PROFILE@"> 2 <tool id="anndata_ops" name="AnnData Operations" version="0.0.2+galaxy0" profile="@PROFILE@">
3 <description>modifies metadata and flags genes</description> 3 <description>modifies metadata and flags genes</description>
4 <macros> 4 <macros>
5 <import>scanpy_macros2.xml</import> 5 <import>scanpy_macros2.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '${input_obj_file}' input.h5 && 9 ln -s '${input_obj_file}' input.h5 &&
10
11 #if $copy_o.default:
12 #for $i, $os in enumerate($copy_o.obs_sources):
13 ln -s '${os}' obs_source_${i}.h5 &&
14 #end for
15 #end if
16
17 #if $copy_e.default:
18 #for $i, $es in enumerate($copy_e.embedding_sources):
19 ln -s '${es}' embedding_source_${i}.h5 &&
20 #end for
21 #end if
10 python $operations 22 python $operations
11 ]]></command> 23 ]]></command>
12 <configfiles> 24 <configfiles>
13 <configfile name="operations"> 25 <configfile name="operations">
14 import scanpy as sc 26 import scanpy as sc
27 import anndata
28 from numpy import all
15 import logging 29 import logging
16 30
17 adata = sc.read('input.h5') 31 adata = sc.read('input.h5')
18 32
19 gene_name = '${gene_symbols_field}' 33 gene_name = '${gene_symbols_field}'
32 qc_vars.append('${flag.flag}') 46 qc_vars.append('${flag.flag}')
33 else: 47 else:
34 logging.warning('No genes starting with {} found, skip calculating expression of {} genes'.format('${flag.startswith}', '${flag.flag}')) 48 logging.warning('No genes starting with {} found, skip calculating expression of {} genes'.format('${flag.startswith}', '${flag.flag}'))
35 #end for 49 #end for
36 50
51 #if $copy_o.default and len($copy_o.obs_keys) > 0:
52 #for $i, $obs_s in enumerate($copy_o.obs_sources):
53 ad_s = sc.read('obs_source_${i}.h5')
54 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
55 #for $j, $o_key in enumerate($copy_o.obs_keys):
56 keys_to_copy = (k for k in ad_s.obs.keys() if "${o_key.contains}" in k)
57 for k_to_copy in keys_to_copy:
58 adata.obs[[k_to_copy+"_${i}"]] = ad_s.obs[[k_to_copy]]
59 if k_to_copy in ad_s.uns.keys():
60 adata.uns[k_to_copy+"_${i}"] = ad_s.uns[k_to_copy]
61 #end for
62 else:
63 logging.warning("Observation source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
64 #end for
65 #end if
66
67
68 #if $copy_e.default and len($copy_e.embedding_keys) > 0:
69 #for $i, $obs_s in enumerate($copy_e.embedding_sources):
70 ad_s = sc.read('embedding_source_${i}.h5')
71 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
72 #for $j, $e_key in enumerate($copy_e.embedding_keys):
73 keys_to_copy = (k for k in ad_s.obsm.keys() if "${e_key.contains}" in k)
74 for k_to_copy in keys_to_copy:
75 adata.obsm[k_to_copy+"_${i}"] = ad_s.obsm[k_to_copy]
76 #end for
77 else:
78 logging.warning("Embedding source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
79 #end for
80 #end if
81
82 #if $sanitize_varm:
83 if hasattr(adata, 'raw') and hasattr(adata.raw, 'X') and hasattr(adata.raw, 'var'):
84 new_ad = anndata.AnnData(X=adata.raw.X, obs=adata.obs, var=adata.raw.var)
85 adata.raw = new_ad
86 #end if
37 87
38 if len(qc_vars) > 0: 88 if len(qc_vars) > 0:
39 pct_top = [${top_genes}] 89 pct_top = [${top_genes}]
40 sc.pp.calculate_qc_metrics(adata, qc_vars=qc_vars, percent_top=pct_top, inplace=True) 90 sc.pp.calculate_qc_metrics(adata, qc_vars=qc_vars, percent_top=pct_top, inplace=True)
41 91
67 <repeat name="gene_flags" title="Flag genes that start with these names"> 117 <repeat name="gene_flags" title="Flag genes that start with these names">
68 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/> 118 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/>
69 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/> 119 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/>
70 </repeat> 120 </repeat>
71 <param name="top_genes" label="Number of top genes" value='50' help="to calculate percentage of the flagged genes in that number of top genes. Used by sc.pp.calculate_qc_metrics (integer)." type="integer"/> 121 <param name="top_genes" label="Number of top genes" value='50' help="to calculate percentage of the flagged genes in that number of top genes. Used by sc.pp.calculate_qc_metrics (integer)." type="integer"/>
122 <conditional name="copy_o">
123 <param name="default" type="boolean" checked="false" label="Copy observations (such as clusters)"/>
124 <when value="true">
125 <repeat name="obs_keys" title="Keys from obs to copy" help="will copy all obs keys in the given AnnData object to the main AnnData object. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data. You can use this to copy clusters." min="1">
126 <param name="contains" type="text" label="Key contains" help="Keys to be copied need to contain the text set here."/>
127 </repeat>
128 <param name="obs_sources" type="data" label="AnnData objects with obs to copy" help="Extracts obs (such as clusters) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/>
129 <!-- <param name="check_o" type="boolean" label="Check compatibility" help="checks if the provided AnnData objects are compatible (same genes and cells) for merging." checked="true"/> -->
130 </when>
131 <when value="false"/>
132 </conditional>
133 <conditional name="copy_e">
134 <param name="default" type="boolean" checked="false" label="Copy embeddings (such as UMAP, tSNE)"/>
135 <when value="true">
136 <repeat name="embedding_keys" title="Keys from embeddings to copy" help="will copy all embedding keys in the given AnnData object to the main AnnData object. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data. You can use this to copy tSNE, UMAP, etc." min="1">
137 <param name="contains" type="text" label="Key contains" help="Keys to be copied need to contain the text set here."/>
138 </repeat>
139 <param name="embedding_sources" type="data" label="AnnData objects with embeddings to copy" help="Extracts embeddings (tSNE, UMAP) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/>
140 <!-- <param name="check_e" type="boolean" label="Check compatibility" help="checks if the provided AnnData objects are compatible (same genes and cells) for merging." checked="true"/> -->
141 </when>
142 <when value="false"/>
143 </conditional>
144 <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/>
72 </inputs> 145 </inputs>
73 146
74 <outputs> 147 <outputs>
75 <expand macro="output_data_obj_no_loom" description="metadata changes on"/> 148 <expand macro="output_data_obj_no_loom" description="metadata changes on"/>
76 </outputs> 149 </outputs>