changeset 20:d86cb2ce311b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 55ba4cd74d5d8f7baff164b1864c36759d1c7fd9
author iuc
date Fri, 18 Oct 2024 10:39:04 +0000
parents 4338bf96809e
children
files inspect.xml macros.xml
diffstat 2 files changed, 194 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/inspect.xml	Thu Oct 03 22:44:00 2024 +0000
+++ b/inspect.xml	Fri Oct 18 10:39:04 2024 +0000
@@ -221,6 +221,27 @@
     #end if
     copy=False)
 
+    #if str($method.get_df.get_df_select) == 'True':
+cluster_DEG = sc.get.rank_genes_groups_df(adata,
+                                            group=None, # return all groups
+                                            #if str($method.get_df.key) != '':
+                                            key='$method.get_df.key',
+                                            #end if
+                                            #if str($method.get_df.pval_cutoff) != '':
+                                            pval_cutoff=$method.get_df.pval_cutoff,
+                                            #end if
+                                            #if str($method.get_df.log2fc_min) != '':
+                                            log2fc_min=$method.get_df.log2fc_min,
+                                            #end if
+                                            #if str($method.get_df.log2fc_max) != '':
+                                            log2fc_max=$method.get_df.log2fc_max,
+                                            #end if
+                                            @CMD_PARAM_GENE_SYMBOLS@                                            
+                                            )
+
+cluster_DEG.to_csv('DEG.tsv', sep="\t", index=False)
+    #end if
+    
 #else if str($method.method) == "tl.marker_gene_overlap":
 reference_markers = {}
 #for $i, $s in enumerate($method.reference_markers)
@@ -292,10 +313,10 @@
 sc.pp.sqrt(
     adata,
     copy=False)
+
+print("stats after sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())    
 #end if
 
-print("stats after sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
-
 @CMD_ANNDATA_WRITE_OUTPUTS@
         ]]>
         </configfile>
@@ -392,6 +413,20 @@
                 <expand macro="params_score_genes"/>
             </when>
             <when value="tl.rank_genes_groups">
+                <conditional name="get_df">
+                    <param name="get_df_select" type="select" label="Get ranked genes as a Tabular file?">
+                        <option value="False" selected="true">False</option>
+                        <option value="True">True</option>
+                    </param>
+                    <when value="False"/>
+                    <when value="True">
+                        <param argument="key" type="text" value="" optional="true" label="Key differential expression groups were stored under"/>
+                        <param argument="pval_cutoff" type="float" min="0.0" value="" optional="true" label="Return only adjusted p-values below the cutoff."/>
+                        <param argument="log2fc_min" type="float" value="" optional="true" label="Minimum logfc to return."/>
+                        <param argument="log2fc_max" type="float" value="" optional="true" label="Maximum logfc to return."/>
+                        <expand macro="param_gene_symbols" label="Column name in [.var] DataFrame that stores gene symbols."/>
+                    </when>
+                </conditional>
                 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider">
                     <expand macro="sanitize_query"/>
                 </param>
@@ -558,6 +593,9 @@
     </inputs>
     <outputs>
         <expand macro="anndata_outputs"/>
+        <data name="ranked_gene" format="tabular" from_work_dir="DEG.tsv" label="${tool.name} (${method.method}) on ${on_string}: Ranked genes">
+            <filter> method['method'] == 'tl.rank_genes_groups' and method['get_df']['get_df_select'] == 'True'</filter>
+        </data>
     </outputs>
     <tests>
 <!-- test 1 -->
@@ -1028,6 +1066,157 @@
                 </assert_contents>
             </output>
         </test>
+
+        <!-- test 16 -->
+        <test expect_num_outputs="3">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <conditional name="get_df">
+                    <param name="get_df_select" value="True"/>
+                </conditional>
+                <param name="groupby" value="cell_type"/>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="t-test_overestim_var"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='t-test_overestim_var'"/>
+                    <has_text_matching expression="corr_method='benjamini-hochberg'"/>
+                    <has_text_matching expression="sc.get.rank_genes_groups_df"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+            <output name="ranked_gene" ftype="tabular">
+                <assert_contents>
+                    <has_line_matching expression="group\tnames\tscores\tlogfoldchanges\tpvals\tpvals_adj"/>
+                    <has_line_matching expression="Ery\tEKLF\t39.086777\t4.8413053\t1.7995717323073084e-66\t1.979528905538039e-65"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 17 -->
+        <test expect_num_outputs="3">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <conditional name="get_df">
+                    <param name="get_df_select" value="True"/>
+                </conditional>
+                <param name="groupby" value="cell_type"/>
+                <param name="groups" value="Ery"/>
+                <conditional name="ref">
+                    <param name="rest" value="group_id"/>
+                    <param name="reference" value="Mk"/>
+                </conditional>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="logreg"/>
+                    <conditional name="solver">
+                        <param name="solver" value="liblinear"/>
+                        <conditional name="penalty">
+                            <param name="penalty" value="l2"/>
+                            <param name="random_state" value="1"/>
+                        </conditional>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='logreg'"/>
+                    <has_text_matching expression="solver='liblinear'"/>
+                    <has_text_matching expression="penalty='l2'"/>
+                    <has_text_matching expression="dual=False"/>
+                    <has_text_matching expression="fit_intercept=True"/>
+                    <has_text_matching expression="intercept_scaling=1.0"/>
+                    <has_text_matching expression="tol=0.0001"/>
+                    <has_text_matching expression="C=1.0"/>
+                    <has_text_matching expression="groups=\['Ery'\]"/>
+                    <has_text_matching expression="reference='Mk'"/>
+                    <has_text_matching expression="sc.get.rank_genes_groups_df"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+            <output name="ranked_gene" ftype="tabular">
+                <assert_contents>
+                    <has_text_matching expression="names\tscores"/>
+                    <has_text_matching expression="Fli1\t3.5328505"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 18 -->
+        <test expect_num_outputs="3">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <conditional name="get_df">
+                    <param name="get_df_select" value="True"/>
+                    <param name="pval_cutoff" value="0.05"/>
+                    <param name="log2fc_min" value="1"/>
+                    <param name="log2fc_max" value="3"/>
+                </conditional>
+                <param name="groupby" value="cell_type"/>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="t-test_overestim_var"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='t-test_overestim_var'"/>
+                    <has_text_matching expression="corr_method='benjamini-hochberg'"/>
+                    <has_text_matching expression="sc.get.rank_genes_groups_df"/>
+                    <has_text_matching expression="pval_cutoff=0.05"/>
+                    <has_text_matching expression="log2fc_min=1"/>
+                    <has_text_matching expression="log2fc_max=3"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+            <output name="ranked_gene" ftype="tabular">
+                <assert_contents>
+                    <has_line_matching expression="group\tnames\tscores\tlogfoldchanges\tpvals\tpvals_adj"/>
+                    <has_line_matching expression="Ery\tFog1\t21.071571\t2.8023682\t5.701001345880348e-35\t3.135550740234191e-34"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 Calculate quality control metrics., using `pp.calculate_qc_metrics`
--- a/macros.xml	Thu Oct 03 22:44:00 2024 +0000
+++ b/macros.xml	Fri Oct 18 10:39:04 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.10.2</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <token name="@PROFILE@">21.09</token>
     <xml name="requirements">
         <requirements>
@@ -1289,8 +1289,8 @@
     ]]>
     </token>
 
-    <xml name="param_gene_symbols">
-        <param argument="gene_symbols" type="text" value="" optional="true" label="Key for field in '.var' that stores gene symbols" help="By default 'var_names' refer to the index column of the '.var' DataFrame">
+    <xml name="param_gene_symbols" token_label="Key for field in '.var' that stores gene symbols" token_help="By default 'var_names' refer to the index column of the '.var' DataFrame">
+        <param argument="gene_symbols" type="text" value="" optional="true" label="@LABEL@" help="@HELP@">
             <expand macro="sanitize_query"/>
         </param>
     </xml>