diff map_ensembl_transcripts.xml @ 21:d6b961721037

Miller Lab Devshed version 4c04e35b18f6
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 05 Nov 2012 12:44:17 -0500
parents 8ae67e9fb6ff
children
line wrap: on
line diff
--- a/map_ensembl_transcripts.xml	Tue Oct 23 14:38:04 2012 -0400
+++ b/map_ensembl_transcripts.xml	Mon Nov 05 12:44:17 2012 -0500
@@ -11,8 +11,10 @@
   </command>
 
   <inputs>
-    <param name="input" type="data" format="tabular" label="Table" />
-    <param name="ensembl_col" type="data_column" data_ref="input" label="Column with ENSEMBL transcript code" />
+    <param name="input" type="data" format="tabular" label="Dataset" >
+       <validator type="unspecified_build" message="This dataset does not have a database/build and cannot be used with this tool" />
+    </param>
+    <param name="ensembl_col" type="data_column" data_ref="input" label="Column with ENSEMBL transcript ID" />
   </inputs>
 
   <outputs>
@@ -34,9 +36,46 @@
 
   <help>
 
+**Dataset formats**
+
+The input and output datasets are in tabular_ format.
+The input dataset must have a column with an ENSEMBL transcript ID and have
+the database/build set.  Even though positions are not needed the correct
+database/build must be given to look up the pathways.
+The output dataset will have added columns for the pathway.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
 **What it does**
 
-Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.
+Adds the fields "KEGG gene ID" and "KEGG pathways" to an input table of ENSEMBL 
+transcript IDs.  A "U" in the KEGG gene ID field indicates that the 
+tool cannot link the ENSEMBL transcript ID to a KEGG gene ID.
+An "N" in the pathway field means the KEGG pathway is unknown.
+
+-----
+
+**Example**
+
+- input::
 
+   ENSCAFT00000000001
+   ENSCAFT00000000144
+   ENSCAFT00000000160
+   ENSCAFT00000000215
+   etc.
+
+- output::
+
+   ENSCAFT00000000001      476153  cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+   ENSCAFT00000000144      483960  N
+   ENSCAFT00000000160      610160  N
+   ENSCAFT00000000215      U       N
+   etc.
+ 
   </help>
 </tool>