diff gemini_db_info.xml @ 5:bc8b01d1b496 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author iuc
date Fri, 11 Jan 2019 17:37:04 -0500
parents f3cc64057b4e
children c2a413e47fe9
line wrap: on
line diff
--- a/gemini_db_info.xml	Fri Dec 14 12:41:01 2018 -0500
+++ b/gemini_db_info.xml	Fri Jan 11 17:37:04 2019 -0500
@@ -1,21 +1,47 @@
-<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1">
-    <description>List the gemini database tables and columns</description>
+<tool id="gemini_db_info" name="GEMINI database info" version="@VERSION@">
+    <description>Retrieve information about tables, columns and annotation data stored in a GEMINI database</description>
     <macros>
         <import>gemini_macros.xml</import>
-        <token name="@BINARY@">db_info</token>
     </macros>
     <expand macro="requirements" />
     <expand macro="stdio" />
     <expand macro="version_command" />
     <command>
 <![CDATA[
-        gemini @BINARY@
-            "${ infile }" | tr -s ' ' '\t'
-            > "${ outfile }"
+        #if str($select.info_type) == 'scheme':
+            gemini db_info '$infile' | tr -s ' ' '\t'
+        #elif str($select.info_type) == 'resources':
+            gemini query --header -q "SELECT name as data_source, resource as resource_file from resources" '$infile'
+        #elif str($select.info_type) == 'gene_detailed':
+            gemini query --header -q "SELECT gene, synonym as synonyms, transcript FROM gene_detailed WHERE gene is not NULL GROUP BY transcript ORDER BY gene" '$infile'
+        #else:
+            gemini query --header -q "SELECT * FROM samples" "$infile" | cut -f 2-${select.format}
+        #end if
+
+        > '$outfile'
 ]]>
     </command>
     <inputs>
         <expand macro="infile" />
+        <conditional name="select">
+            <param name="info_type" type="select"
+            label="Information to retrieve from the database">
+                <option value="scheme">Names of database tables and their columns</option>
+                <option value="resources">List of annotation data sources</option>
+                <option value="gene_detailed">List of genes and their transcripts</option>
+                <option value="samples">Write out sample information in PED format</option>
+            </param>
+            <when value="scheme" />
+            <when value="resources" />
+            <when value="gene_detailed" />
+            <when value="samples">
+                <param name="format" type="select" display="radio"
+                label="Format of the pedigree file">
+                    <option value="">Allow extra columns to accomodate custom sample info</option>
+                    <option value="7">Restrict to 6 PED standard columns and drop additional info</option>
+                </param>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data name="outfile" format="tabular" />
@@ -23,67 +49,71 @@
     <tests>
         <test>
             <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" />
+            <conditional name="select">
+                <param name="info_type" value="scheme" />
+            </conditional>
             <output name="outfile">
                 <assert_contents>
                     <has_line_matching expression="table_name&#009;column_name&#009;type.*" />
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" />
+            <conditional name="select">
+                <param name="info_type" value="resources" />
+            </conditional>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line line="data_source&#009;resource_file" />
+                    <has_n_columns n="2" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" />
+            <conditional name="select">
+                <param name="info_type" value="gene_detailed" />
+            </conditional>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line line="gene&#009;synonyms&#009;transcript" />
+                    <has_n_columns n="3" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
+            <conditional name="select">
+                <param name="info_type" value="samples" />
+            </conditional>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="family_id&#009;name&#009;paternal_id&#009;maternal_id&#009;sex&#009;phenotype&#009;.+" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
+            <conditional name="select">
+                <param name="info_type" value="samples" />
+                <param name="format" value="7" />
+            </conditional>
+            <output name="outfile">
+                <assert_contents>
+                    <has_line line="family_id&#009;name&#009;paternal_id&#009;maternal_id&#009;sex&#009;phenotype" />
+                    <has_n_columns n="6" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**
 
-Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote.
-If you can’t recall the name of particular column, just use the db_info tool.
-It will report all of the tables and all of the columns / types in each table::
+Because of the sheer number of annotations that are stored in gemini, it is easy to lose the overview of what is actually available through database queries.
 
- table_name          column_name                   type
- variants            chrom                         text
- variants            start                         integer
- variants            end                           integer
- variants            variant_id                    integer
- variants            anno_id                       integer
- variants            ref                           text
- variants            alt                           text
- variants            qual                          float
- variants            filter                        text
- variants            type                          text
- variants            sub_type                      text
- variants            gts                           blob
- variants            gt_types                      blob
- variants            gt_phases                     blob
- variants            gt_depths                     blob
- variants            call_rate                     float
- variants            in_dbsnp                      bool
- variants            rs_ids                        text
- variants            in_omim                       bool
- variants            clin_sigs                     text
- variants            cyto_band                     text
- variants            rmsk                          text
- variants            in_cpg_island                 bool
- variants            in_segdup                     bool
- variants            is_conserved                  bool
- variants            num_hom_ref                   integer
- variants            num_het                       integer
- variants            num_hom_alt                   integer
- variants            num_unknown                   integer
- variants            aaf                           float
- variants            hwe                           float
- variants            inbreeding_coeff              float
- variants            pi                            float
- variants            recomb_rate                   float
- variants            gene                          text
- variants            transcript                    text
- variants            is_exonic                     bool
- variants            is_coding                     bool
- variants            is_lof                        bool
- variants            exon                          text
- variants            codon_change                  text
- variants            aa_change                     text
- ...                 ...                           ... 
-
-For more columns see https://gemini.readthedocs.org/en/latest/content/tools.html#db-info-list-the-gemini-database-tables-and-columns
-
+This tool offers a simple way to inspect the structure, the origin of the
+annotations and some of the actual annotation content quickly.
     ]]></help>
     <expand macro="citations"/>
 </tool>