Mercurial > repos > iuc > episcanpy_preprocess

--- a/macros.xml	Tue Apr 18 13:18:50 2023 +0000
+++ b/macros.xml	Sat Apr 22 12:13:44 2023 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">0.3.2</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">21.01</token>
     <xml name="requirements">
         <requirements>
--- a/preprocess.xml	Tue Apr 18 13:18:50 2023 +0000
+++ b/preprocess.xml	Sat Apr 22 12:13:44 2023 +0000
@@ -102,6 +102,16 @@
     save='plot.png'
 )

+#else if $method.method == 'tl.find_genes'
+esc.tl.find_genes(
+    adata,
+    gtf_file='$method.find_genes_gtf_file',
+    key_added='$method.find_genes_key_added',
+    upstream=$method.find_genes_upstream,
+    feature_type='$method.find_genes_feature_type',
+    annotation='$method.find_genes_annotation',
+    raw=$method.find_genes_raw)
+
 #end if
 @CMD_anndata_write_outputs@
 ]]></configfile>
@@ -119,6 +129,7 @@
                 <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option>
                 <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option>
                 <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option>
+                <option value="tl.find_genes">Embedding: Find genes and add annotations, using 'pp.find_genes'</option>
             </param>
             <when value="pp.binarize" />
             <when value="pp.filter_cells">
@@ -186,13 +197,57 @@
                     <option value="log10">Log10</option>
                 </param>
             </when>
+            <when value="tl.find_genes">
+                <param name="find_genes_gtf_file" type="data" format="gtf" label="Annotation GTF file" help="(gtf_file)"/>
+                <param name="find_genes_key_added" value="transcript_annotation" type="text" label="Key added" help="(key_added)">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.letters,string.digits">
+                            <add value="_" />
+                            <add value="-" />
+                            <add value="." />
+                            <add value=" " />
+                            <add value="," />
+                        </valid>
+                    </sanitizer>
+                    <validator type="regex">[0-9a-zA-Z_., -]+</validator>
+                </param>
+                <param name="find_genes_upstream" value="2000" min="0" type="integer" label="Upstream" help="(upstream)"/>
+                <param name="find_genes_feature_type" value="transcript" type="text" label="Feature type" help="(feature_type)">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.letters,string.digits">
+                            <add value="_" />
+                            <add value="-" />
+                            <add value="." />
+                            <add value=" " />
+                            <add value="," />
+                        </valid>
+                    </sanitizer>
+                    <validator type="regex">[0-9a-zA-Z_., -]+</validator>
+                </param>
+                <param name="find_genes_annotation" value="HAVANA" type="text" label="Annotation" help="(annotation)">
+                    <sanitizer invalid_char="">
+                        <valid initial="string.letters,string.digits">
+                            <add value="_" />
+                            <add value="-" />
+                            <add value="." />
+                            <add value=" " />
+                            <add value="," />
+                        </valid>
+                    </sanitizer>
+                    <validator type="regex">[0-9a-zA-Z_., -]+</validator>
+                </param>
+                <param name="find_genes_raw" type="select" label="Raw?" help="(raw)">
+                    <option value="True">True</option>
+                    <option value="False" selected="true">False</option>
+                </param>
+            </when>
         </conditional>
         <expand macro="inputs_common_advanced"/>
     </inputs>
     <outputs>
         <expand macro="anndata_outputs"/>
-        <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}">
-            <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'select_var_feature'</filter>
+        <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) plot on ${on_string}">
+            <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'pp.select_var_feature' and method['method'] !='tl.find_genes'</filter>
         </data>
     </outputs>
     <tests>
@@ -263,7 +318,7 @@
                 <has_text_matching expression="nb_features"/>
             </assert_stdout>
         </test>
-        <test  expect_num_outputs="3">
+        <test  expect_num_outputs="2">
             <!-- pp.select_var_feature -->
             <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
@@ -372,6 +427,27 @@
             <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/>
             <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/>
         </test>
+       <test expect_num_outputs="2">
+           <!-- tl.find_genes -->
+           <param name="adata" value="chrY.h5ad" />
+           <conditional name="method">
+               <param name="method" value="tl.find_genes"/>
+               <param name="find_genes_gtf_file" value="chrY.gtf"/>
+               <param name="find_genes_key_added" value="transcript_annotation"/>
+               <param name="find_genes_upstream" value="2000"/>
+               <param name="find_genes_feature_type" value="transcript"/>
+               <param name="find_genes_annotation" value="HAVANA"/>
+               <param name="find_genes_raw" value="False"/>
+           </conditional>
+           <section name="advanced_common">
+               <param name="show_log" value="true" />
+           </section>
+           <output name="anndata_out" file="chrY_with_transcript_annotation.h5ad" ftype="h5ad" compare="sim_size">
+                <assert_contents>
+                    <has_h5_keys keys="var" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[

@@ -426,7 +502,6 @@

 Distribution of cells sharing features and variability score (`pp.cal_var`)
 =============================================================================
-
 Show distribution plots of cells sharing features and variability score.

 More details on the `episcanpy documentation
@@ -434,8 +509,11 @@

 Compute a variability score to rank the most variable features across all cells (`pp.variability_features`)
 ============================================================================================================
+This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score).

-This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score).
+Find and add gene annotations (`tl.find_genes`)
+========================================================================================
+This function adds a gene annotation to an AnnData (h5ad) file from annotations file (.annotation.gtf).

 More details on the `episcanpy documentation
 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__
Binary file test-data/chrY.h5ad has changed
Binary file test-data/chrY_norm_peaks.h5ad has changed
Binary file test-data/chrY_with_transcript_annotation.h5ad has changed