Mercurial > repos > iuc > scanpy_remove_confounders
diff remove_confounders.xml @ 0:9ca360dde8e3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author | iuc |
---|---|
date | Mon, 04 Mar 2019 10:16:47 -0500 |
parents | |
children | a89ee42625ad |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_confounders.xml Mon Mar 04 10:16:47 2019 -0500 @@ -0,0 +1,204 @@ +<tool id="scanpy_remove_confounders" name="Remove confounders with scanpy" version="@version@"> + <description></description> + <macros> + <import>macros.xml</import> + <xml name="score_genes_params"> + <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/> + <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/> + <expand macro="param_use_raw"/> + </xml> + <token name="@CMD_score_genes_inputs@"><![CDATA[ + n_bins=$method.n_bins, + random_state=$method.random_state, + use_raw=$method.use_raw, + copy=False + ]]></token> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == "pp.regress_out" +sc.pp.regress_out( + adata=adata, + keys='$method.reg_keys', + copy=False) +#elif $method.method == "tl.score_genes" +sc.tl.score_genes( + adata=adata, + #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] + gene_list=$gene_list, + ctrl_size=$method.ctrl_size, + score_name='$method.score_name', + #if $method.gene_pool + #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] + gene_pool=$gene_pool, + #end if + @CMD_score_genes_inputs@) +adata.obs.to_csv('$obs', sep='\t') +#elif $method.method == "tl.score_genes_cell_cycle" +sc.tl.score_genes_cell_cycle( + adata=adata, + #set $s_genes = [str(x.strip()) for x in $method.s_genes.split(',')] + s_genes=$s_genes, + #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.split(',')] + g2m_genes=$g2m_genes, + @CMD_score_genes_inputs@) +adata.obs.to_csv('$obs', sep='\t') +#end if + +@CMD_anndata_write_outputs@ +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_anndata"/> + <conditional name="method"> + <param argument="method" type="select" label="Method used for plotting"> + <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option> + <!--<option value="pp.mnn_correct">, using `pp.mnn_correct`</option>!--> + <!--<option value="pp.dca">, using `pp.mnn_correct`</option>!--> + <!--<option value="pp.magic">, using `pp.magic`</option>!--> + <!--<option value="tl.sim">, using `tl.sim`</option>!--> + <!--<option value="pp.calculate_qc_metrics">, using `pp.calculate_qc_metrics`</option>!--> + <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option> + <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option> + <!--<option value="tl.cyclone">, using `tl.cyclone`</option>!--> + <!--<option value="tl.andbag">, using `tl.andbag`</option>!--> + </param> + <when value="pp.regress_out"> + <param argument="reg_keys" type="text" value="" label="Keys for observation annotation on which to regress on" help=""/> + </when> + <when value="tl.score_genes"> + <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> + <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" + help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> + <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" + help="Default is all genes. Genes separated by a comma"/> + <expand macro="score_genes_params"/> + <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> + </when> + <when value="tl.score_genes_cell_cycle"> + <param name="s_genes" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/> + <param name="g2m_genes" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/> + <expand macro="score_genes_params"/> + </when> + </conditional> + <expand macro="anndata_output_format"/> + </inputs> + <outputs> + <expand macro="anndata_outputs"/> + <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation"> + <filter>method['method'] == 'tl.score_genes' or method['method'] == 'tl.score_genes_cell_cycle'</filter> + </data> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.regress_out"/> + <param name="reg_keys" value="cell_type"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.regress_out"/> + <has_text_matching expression="keys='cell_type'"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.regress_out.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="tl.score_genes"/> + <param name="gene_list" value="Gata2, Fog1"/> + <param name="ctrl_size" value="2"/> + <param name="n_bins" value="2"/> + <param name="random_state" value="2"/> + <param name="use_raw" value="False"/> + <param name="score_name" value="score"/> + </conditional> + <param name="anndata_output_format" value="h5ad"/> + <assert_stdout> + <has_text_matching expression="sc.tl.score_genes" /> + <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> + <has_text_matching expression="ctrl_size=2" /> + <has_text_matching expression="score_name='score'" /> + <has_text_matching expression="n_bins=2" /> + <has_text_matching expression="random_state=2" /> + <has_text_matching expression="use_raw=False" /> + <has_text_matching expression="copy=False" /> + </assert_stdout> + <output name="anndata_out_h5ad" file="tl.score_genes.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + <output name="obs" file="tl.score_genes.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="tl.score_genes_cell_cycle"/> + <param name="s_genes" value="Gata2, Fog1, EgrNab"/> + <param name="g2m_genes" value="Gata2, Fog1, EgrNab"/> + <param name="n_bins" value="2"/> + <param name="random_state" value="1"/> + <param name="use_raw" value="False"/> + </conditional> + <param name="anndata_output_format" value="h5ad"/> + <assert_stdout> + <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> + <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> + <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> + <has_text_matching expression="n_bins=2"/> + <has_text_matching expression="random_state=1"/> + <has_text_matching expression="use_raw=False"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + <output name="obs" file="tl.score_genes_cell_cycle.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ +Regress out unwanted sources of variation, using `pp.regress_out` +================================================================= + +Regress out unwanted sources of variation, using simple linear regression. This is +inspired by Seurat's `regressOut` function in R. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.regress_out.html#scanpy.api.pp.regress_out>`__ + +Score a set of genes, using `tl.score_genes` +============================================ + +The score is the average expression of a set of genes subtracted with the +average expression of a reference set of genes. The reference set is +randomly sampled from the `gene_pool` for each binned expression value. + +This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented +for Scanpy by Davide Cittaro. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes.html#scanpy.api.tl.score_genes>`__ + +Score cell cycle genes, using `tl.score_genes_cell_cycle` +========================================================= + +Given two lists of genes associated to S phase and G2M phase, calculates +scores and assigns a cell cycle phase (G1, S or G2M). See +`score_genes` for more explanation. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes_cell_cycle.html#scanpy.api.tl.score_genes_cell_cycle>`__ + ]]></help> + <expand macro="citations"/> +</tool>