1
|
1 <tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
|
|
2 <description>from an Input wide dataset</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <stdio>
|
|
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
|
|
9 </stdio>
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11 split_wide_dataset.py
|
|
12 -i=$input
|
|
13 #if $cond_UniqID.hasUniqID == "y":
|
|
14 -id=$cond_UniqID.uniqID
|
|
15 #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y":
|
|
16 -p2=$cond_UniqID.only_numbers.prefix2
|
|
17 #end if
|
|
18 #else:
|
|
19 -p=$cond_UniqID.prefix
|
|
20 #end if
|
|
21 -s=$samples
|
|
22 -w=$wide
|
|
23 -d=$design
|
|
24 -a=$annot
|
|
25 ]]></command>
|
|
26 <inputs>
|
|
27 <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/>
|
|
28 <conditional name="cond_UniqID">
|
|
29 <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?">
|
|
30 <option value="y">Yes</option>
|
|
31 <option value="n">No</option>
|
|
32 </param>
|
|
33 <when value="y">
|
|
34 <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/>
|
|
35 <conditional name="only_numbers">
|
|
36 <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?">
|
|
37 <option value="y">Yes</option>
|
|
38 <option value="n">No</option>
|
|
39 </param>
|
|
40 <when value="y">
|
|
41 <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
|
|
42 </when>
|
|
43 </conditional>
|
|
44 </when>
|
|
45 <when value="n">
|
|
46 <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/>
|
|
47 </when>
|
|
48 </conditional>
|
|
49 <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/>
|
|
50 </inputs>
|
|
51 <outputs>
|
|
52 <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/>
|
|
53 <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/>
|
|
54 <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/>
|
|
55 </outputs>
|
|
56 <tests>
|
|
57 <test>
|
|
58 <param name="input" value="gene_input_dataset_01fhl.tsv"/>
|
|
59 <param name="prefix" value="Gene"/>
|
|
60 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
|
|
61 <output name="wide" file="gene_wide_dataset_01fhl.tsv"/>
|
|
62 <output name="design" file="gene_design_file_01fhl.tsv"/>
|
|
63 <output name="annot" file="gene_annot_file_01fhl.tsv"/>
|
|
64 </test>
|
|
65 <test>
|
|
66 <param name="input" value="metabolite_input_dataset_01fhl.tsv"/>
|
|
67 <param name="prefix" value="Met"/>
|
|
68 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
|
|
69 <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/>
|
|
70 <output name="design" file="met_design_file_01fhl.tsv"/>
|
|
71 <output name="annot" file="met_annot_file_01fhl.tsv"/>
|
|
72 </test>
|
|
73 </tests>
|
|
74 <help><![CDATA[
|
|
75
|
|
76 **Tool Description**
|
|
77
|
|
78 This tool can be used to perform two tasks 1) convert a single file that contains
|
|
79 data and annotation in wide format to two files in wide format, one with data and
|
|
80 one with annotation 2) create a design file template that will be compatible with
|
|
81 the wide data and annotation files. The tool will automatically check for a column
|
|
82 containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located
|
|
83 the tool will generate one. The user can specify a prefix for the unique FeatureID
|
|
84 (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact
|
|
85 match to the columns with a single column called 'SampleID' that contains the names
|
|
86 of the samples in the input Wide Dataset. This Design Dataset can be modified by
|
|
87 the user to include metadata columns. The tool also creates a separate Annotation
|
|
88 Dataset containing the unique FeatureIDs (user-specified or generated by the tool)
|
|
89 and any non-sample descriptor columns that were present in the input wide dataset
|
|
90 (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates
|
|
91 a 'clean' Wide Dataset containing only samples in columns and features in rows.
|
|
92
|
|
93 --------------------------------------------------------------------------------
|
|
94
|
|
95 **INPUT**
|
|
96
|
|
97 **Example - Wide Format Input Dataset**
|
|
98
|
|
99 +---------+-----------+---------+---------+-----+
|
|
100 | rowID | m/z ratio | sample1 | sample2 | ... |
|
|
101 +=========+===========+=========+=========+=====+
|
|
102 | 1 | 8.845 | 20 | 10 | ... |
|
|
103 +---------+-----------+---------+---------+-----+
|
|
104 | 2 | 0.258 | 22 | 30 | ... |
|
|
105 +---------+-----------+---------+---------+-----+
|
|
106 | 3 | 10.54 | 27 | 2 | ... |
|
|
107 +---------+-----------+---------+---------+-----+
|
|
108 | 4 | 8.594 | 17 | 8 | ... |
|
|
109 +---------+-----------+---------+---------+-----+
|
|
110 | ... | ... | ... | ... | ... |
|
|
111 +---------+-----------+---------+---------+-----+
|
|
112
|
|
113 **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File.
|
|
114
|
|
115 **Unique FeatureID**
|
|
116
|
|
117 If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one.
|
|
118
|
|
119 **Prefix**
|
|
120
|
|
121 The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number.
|
|
122
|
|
123 **Sample Columns**
|
|
124
|
|
125 Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File.
|
|
126
|
|
127 --------------------------------------------------------------------------------
|
|
128
|
|
129 **OUTPUT**
|
|
130
|
|
131
|
|
132 **A Wide Dataset containing the FeatureID column and all columns selected as samples**
|
|
133
|
|
134 +------------+---------+---------+---------+-----+
|
|
135 | FeatureID | sample1 | sample2 | sample3 | ... |
|
|
136 +============+=========+=========+=========+=====+
|
|
137 | met_1 | 10 | 20 | 10 | ... |
|
|
138 +------------+---------+---------+---------+-----+
|
|
139 | met_2 | 5 | 22 | 30 | ... |
|
|
140 +------------+---------+---------+---------+-----+
|
|
141 | met_3 | 30 | 27 | 2 | ... |
|
|
142 +------------+---------+---------+---------+-----+
|
|
143 | met_4 | 32 | 17 | 8 | ... |
|
|
144 +------------+---------+---------+---------+-----+
|
|
145 | ... | ... | ... | ... | ... |
|
|
146 +------------+---------+---------+---------+-----+
|
|
147
|
|
148 In the above example, *met* was input for Prefix
|
|
149
|
|
150
|
|
151 **A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples**
|
|
152
|
|
153 +----------+---------+
|
|
154 | SampleID | |
|
|
155 +==========+=========+
|
|
156 | sample1 | |
|
|
157 +----------+---------+
|
|
158 | sample2 | |
|
|
159 +----------+---------+
|
|
160 | sample3 | |
|
|
161 +----------+---------+
|
|
162 | sample4 | |
|
|
163 +----------+---------+
|
|
164 | ... | |
|
|
165 +----------+---------+
|
|
166
|
|
167
|
|
168 **An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns**
|
|
169
|
|
170 +-------------+------------+-----+
|
|
171 | FeatureID | m/z ratio | ... |
|
|
172 +=============+============+=====+
|
|
173 | FeatureID_1 | 8.845 | ... |
|
|
174 +-------------+------------+-----+
|
|
175 | FeatureID_2 | 0.258 | ... |
|
|
176 +-------------+------------+-----+
|
|
177 | FeatureID_3 | 10.54 | ... |
|
|
178 +-------------+------------+-----+
|
|
179 | FeatureID_4 | 8.594 | ... |
|
|
180 +-------------+------------+-----+
|
|
181 | ... | ... | ... |
|
|
182 +-------------+------------+-----+
|
|
183
|
|
184
|
|
185 ]]>
|
|
186 </help>
|
|
187 <citations>
|
|
188 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
|
|
189 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
|
|
190 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
|
|
191 journal = {BMC Bioinformatics},
|
|
192 year = {in press}
|
|
193 }</citation>
|
|
194 <citation type="bibtex">@article{garcia2010paintomics,
|
|
195 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
|
|
196 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
|
|
197 journal={Bioinformatics},
|
|
198 volume={27},
|
|
199 number={1},
|
|
200 pages={137--139},
|
|
201 year={2010},
|
|
202 publisher={Oxford University Press}
|
|
203 }</citation>
|
|
204 </citations>
|
|
205 </tool>
|