diff impc_tool.xml @ 0:d319dc5f3ea8 draft default tip

planemo upload for repository https://github.com/INFRAFRONTIERDIB/tools-iuc/tree/query_impc/tools/query_impc commit 991881b5df5f5228ecf4445ee2cc1431b9602ea8
author iuc
date Wed, 11 Oct 2023 14:51:02 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/impc_tool.xml	Wed Oct 11 14:51:02 2023 +0000
@@ -0,0 +1,351 @@
+<tool id="query_impc" name="IMPC" version="0.9.0" profile="22.05">
+  <description>query tool</description>
+  <macros>
+   <xml name="selectSeparator">
+    <param name="sep" type="select" label="Select the separator used in the file">
+     <option value="t">tab</option>
+     <option value="s">single space</option>
+     <option value=",">Comma</option>
+     <option value=";">Semicolumn</option>
+    </param>
+   </xml>
+   <xml name="inputType">
+    <param name="inp_sel" type="select" label="Select the type of input">
+     <option value="str">Direct input</option>
+     <option value="txt">Txt file</option>
+    </param>
+   </xml>
+   <xml name="outputType">
+    <param name="g_out" type="select" label="Select the type of gene ID in the output" help="Select if the genes in the output will use MGI IDs (default option) or Symbol IDs">
+     <option value="mgi">MGI IDs</option>
+     <option value="sym">Symbol IDs</option>
+    </param>
+   </xml>
+   <xml name="header">
+    <param name="head" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Choose if include the header in the output" help="The default value is True"/>
+   </xml>
+  </macros>
+  <creator>
+   <organization name="INFRAFRONTIER GmbH" url="https://www.infrafrontier.eu/" email="info@infrafrontier.eu" />
+   <person name="Andrea Furlani" email="andrea.furlani@infrafrontier.eu" />
+   <person name="Philipp Gormanns" email="philipp.gormanns@infrafrontier.eu" />
+  </creator>
+  <requirements>
+   <requirement type="package" version="2.25.1">requests</requirement>
+   <requirement type="package" version="1.3.5">pandas</requirement>
+   <requirement type="package" version="4.9.2">lxml</requirement>
+   <requirement type="package" version="3.2.2">mygene</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+   <![CDATA[
+   python3 '$__tool_directory__/impc_tool.py' 
+   #if $query_type.selector == "7"
+    '$query_type.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.g_out'
+   #else
+    #if $query_type.inp_q.inp_sel == "str"
+     '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel'
+    #else
+     '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel' '$query_type.inp_q.sep'
+    #end if
+   #end if
+   #if $query_type.selector in ["2", "8", "9"]
+    '$query_type.g_out'
+   #end if]]>
+  </command>
+  <inputs>
+   <conditional name="query_type">
+    <param name="selector" type="select" label="Select a query">
+     <option value="1">1 - Extract all measured phenotypes related to a gene</option>
+     <option value="2">2 - Extract all genes having a particular phenotype or a set of phenotypes (e.g. relevant to a disease)</option>
+     <option value="3">3 - Extract all phenotypes which are present in a particular gene set (e.g. genes together in a pathway)</option>
+     <option value="4">4 - Extract images with a particular phenotype or a set of phenotypes</option>
+     <option value="5">5 - Which IMPReSS parameters have been measured for a particular knockout</option>
+     <option value="6">6 - Which IMPRess parameters Identified a significant finding for a particular knockout</option>
+     <option value="7">7 - Full table of genes and all Identified phenotypes, no input needed</option>
+     <option value="8">8 - Extract all genes names and ID measured in a specific IMPReSS pipeline</option>
+     <option value="9">9 - Extract all genes and corresponding phenotypes related to a particular top level phenotype category</option>
+    </param>
+    <when value="1">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter a single MGI gene ID or gene symbol"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="tabular,txt" label="Input file" help="Enter a txt file with the Gene MGI ID or gene symbol"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="2">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="3">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene or set of genes" help="Enter a single MGI gene ID (or gene symbol) or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the genes MGI IDs or symbols"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="4">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="5">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="6">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="7">
+     <param name="input" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Include genes without identified phenotypes?" help="Choose if include in the output table also those genes that have no registred phenotypes. By default they are excluded."/>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="8">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input pipeline" help="Enter a IMPReSS pipeline ID"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS pipeline ID"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="9">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input ID" help="Enter a top level phenotype category ID"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with a top level phenotype category ID"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+   </conditional>
+  </inputs>
+  <outputs>
+   <data format="tabular" name="output" label="${tool.name} query n° $query_type.selector"/>  
+  </outputs>
+  <tests>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="1"/>
+     <conditional name="inp_q">
+      <param name="inp_sel" value="txt"/>
+      <param name="input" value="test_query_1.txt"/>
+      <param name="sep" value="t"/>
+     </conditional>
+     <param name="head" value="True"/>
+    </conditional>
+    <output name="output" file="test_output_1_1.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="1"/>
+     <conditional name="inp_q">
+      <param name="input" value="Car4"/>
+      <param name="inp_sel" value="str"/>
+     </conditional>
+     <param name="head" value="True"/>
+   </conditional>
+   <output name="output" file="test_output_1_2.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="2"/>
+     <conditional name="inp_q">
+      <param name="input" value="test_query_2.txt"/>
+      <param name="inp_sel" value="txt"/>
+      <param name="sep" value="t"/>
+      <param name="g_out" value="mgi"/>
+     </conditional>
+     <param name="head" value="True"/>
+    </conditional>
+    <output name="output" file="test_output_2.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="3"/>
+     <conditional name="inp_q">
+      <param name="input" value="test_query_3.txt"/>
+      <param name="inp_sel" value="txt"/>
+      <param name="sep" value="t"/>
+     </conditional>
+     <param name="head" value="False"/>
+    </conditional>
+    <output name="output" value="test_output_3.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="9"/>
+     <conditional name="inp_q">
+      <param name="input" value="MP:0005388"/>
+      <param name="inp_sel" value="str"/>
+     </conditional>
+     <param name="head" value="True"/>
+     <param name="g_out" value="sym"/>
+    </conditional>
+    <output name="output" file="test_output_9.tabular"/>
+   </test>
+  </tests>
+  <help><![CDATA[
+   **What it does**
+ 
+   With this tool, it is possible to submit various types of queries to the IMPC database.
+   Select the desired query from the drop down menu. As input both MGI IDs or gene symbols are allowed (even mixed). If you want to input more than one ID, separate them with a comma without spaces (eg: MGI:104636,MGI:104637). If a mixed input is retrieved, the order after the mapping will not be maintained.
+   Note that if the mapping between the two types of IDs doesn't retrieves a result, that ID will not be included in the query input, resulting in an error if all of the IDs are not mapped. The output will be a table containing the data.
+   For the phenotypes, is possible to give as input both MP term IDs or HP terms IDs since they will be mapped to MP terms (also here the order of the input will not be maintained).
+   For both genes and phenotypes mapping, check the "View details" section of the job to check if some of them were not mapped (typo errors/ID not present in the database).
+   For queries requiring an IMPReSS pipeline ID, here_ is possible to find a complete list with details about each pipeline.
+   For query 7 no inputs are required and you can choose if including genes without identified phenotypes or not.
+   In query number 9, a top level phenotype category is required as input. On IMPC, phenotypes are divided into 20 categories to summarize wich systems are mainly influenced by the phenotype. In the database they are 24, since some of them are splitted into different groups:
+ 
+ 
+   +-----------------------------------------+---------------------------------------+
+   |    Top level phenotype category name    |    top level phenotype category ID    |
+   +=========================================+=======================================+
+   |    Immune system phenotype              |    MP:0005387                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Integument phenotype                 |    MP:0010771                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Adipose tissue phenotype             |    MP:0005375                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Hearing/vestibular/ear phenotype     |    MP:0005377                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Hematopoietic system phenotype       |    MP:0005397                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Craniofacial phenotype               |    MP:0005382                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Cardiovascular system phenotype      |    MP:0005385                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Renal/urinary system phenotype       |    MP:0005367                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Homeostasis/metabolism phenotype     |    MP:0005376                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Pigmentation phenotype               |    MP:0001186                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Limbs/digits/tail phenotype          |    MP:0005371                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Nervous system phenotype             |    MP:0003631                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Vision/eye phenotype                 |    MP:0005391                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Liver/biliary system phenotype       |    MP:0005370                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Respiratory system phenotype         |    MP:0005388                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Behavior/neurological phenotype      |    MP:0005386                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Skeleton phenotype                   |    MP:0005390                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Mortality/aging                      |    MP:0010768                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Reproductive system phenotype        |    MP:0005389                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Endocrine/exocrine gland phenotype   |    MP:0005379                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Growth/size/body region phenotype    |    MP:0005378                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Embryo phenotype                     |    MP:0005380                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Muscle phenotype                     |    MP:0005369                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Digestive/alimentary phenotype       |    MP:0005381                         |
+   +-----------------------------------------+---------------------------------------+
+ 
+   |
+   |
+ 
+   Moreover, the when the output of a query is a list of genes, the user can choose if the output will be MGI IDs or gene symbols. Please note that it is not possible to map a gene, it will had the same ID as the beggining.
+   For each query is possible to choose if include or not an header row. Note that not all tools have an option to remove it automatically. In this case the user will have to remove it using the tool "Remove beginning of a file".
+ 
+ 
+   The headers for each query are the following:
+ 
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |         Query                                                                                     |    Output header columns                                                       |
+   +===================================================================================================+================================================================================+
+   |Extract all measured phenotypes related to a gene                                                  |MP term name, MP term ID                                                        |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes having a particular phenotype or a set of phenotypes                             |Gene accession ID/Gene symbol, Gene name, Gene bundle url                       |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all phenotypes which are present in a particular gene set                                  |MP term ID, MP term name, genes                                                 |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract images with a particular phenotype or a set of phenotypes                                  |External sample ID, Gene symbol, Biological sample group, Sex, Colony ID,       |
+   |                                                                                                   |Zygosity, Parameter name, Download url, Thumbnail url                           |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Which IMPReSS parameters have been measured for a particular knockout                              |IMPReSS Parameter name                                                          |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Which IMPRess parameters identified a significant finding for a particular knockout                |IMPReSS Parameter name, p-value                                                 |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Full table of genes and all identified phenotypes                                                  |Gene accession ID/Gene symbol, Identified phenotypes                            |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes names and ID measured in a specific IMPReSS pipeline                             |Gene accession ID/Gene symbol, Gene name                                        |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes and corresponding phenotypes related to a particular top level phenotype category|Gene accession ID/Gene symbol, Significant mp term ID, Significant mp term name |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+ 
+   .. _here: https://www.mousephenotype.org/impress/pipelines
+  ]]></help>
+  <citations>
+   <citation type="doi">https://doi.org/10.1093/nar/gku1193</citation>
+   <citation type="doi">https://doi.org/10.12688/f1000research.25369.1</citation>
+   <citation type="doi">https://doi.org/10.1038/nature19356</citation>
+  </citations>
+ </tool>
\ No newline at end of file