comparison split_wide_dataset.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
comparison
equal deleted inserted replaced
0:864fc6430432 1:ec9ee8edb84d
1 <tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
2 <description>from an Input wide dataset</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <stdio>
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[
11 split_wide_dataset.py
12 -i=$input
13 #if $cond_UniqID.hasUniqID == "y":
14 -id=$cond_UniqID.uniqID
15 #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y":
16 -p2=$cond_UniqID.only_numbers.prefix2
17 #end if
18 #else:
19 -p=$cond_UniqID.prefix
20 #end if
21 -s=$samples
22 -w=$wide
23 -d=$design
24 -a=$annot
25 ]]></command>
26 <inputs>
27 <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/>
28 <conditional name="cond_UniqID">
29 <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?">
30 <option value="y">Yes</option>
31 <option value="n">No</option>
32 </param>
33 <when value="y">
34 <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/>
35 <conditional name="only_numbers">
36 <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?">
37 <option value="y">Yes</option>
38 <option value="n">No</option>
39 </param>
40 <when value="y">
41 <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
42 </when>
43 </conditional>
44 </when>
45 <when value="n">
46 <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/>
47 </when>
48 </conditional>
49 <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/>
50 </inputs>
51 <outputs>
52 <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/>
53 <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/>
54 <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/>
55 </outputs>
56 <tests>
57 <test>
58 <param name="input" value="gene_input_dataset_01fhl.tsv"/>
59 <param name="prefix" value="Gene"/>
60 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
61 <output name="wide" file="gene_wide_dataset_01fhl.tsv"/>
62 <output name="design" file="gene_design_file_01fhl.tsv"/>
63 <output name="annot" file="gene_annot_file_01fhl.tsv"/>
64 </test>
65 <test>
66 <param name="input" value="metabolite_input_dataset_01fhl.tsv"/>
67 <param name="prefix" value="Met"/>
68 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
69 <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/>
70 <output name="design" file="met_design_file_01fhl.tsv"/>
71 <output name="annot" file="met_annot_file_01fhl.tsv"/>
72 </test>
73 </tests>
74 <help><![CDATA[
75
76 **Tool Description**
77
78 This tool can be used to perform two tasks 1) convert a single file that contains
79 data and annotation in wide format to two files in wide format, one with data and
80 one with annotation 2) create a design file template that will be compatible with
81 the wide data and annotation files. The tool will automatically check for a column
82 containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located
83 the tool will generate one. The user can specify a prefix for the unique FeatureID
84 (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact
85 match to the columns with a single column called 'SampleID' that contains the names
86 of the samples in the input Wide Dataset. This Design Dataset can be modified by
87 the user to include metadata columns. The tool also creates a separate Annotation
88 Dataset containing the unique FeatureIDs (user-specified or generated by the tool)
89 and any non-sample descriptor columns that were present in the input wide dataset
90 (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates
91 a 'clean' Wide Dataset containing only samples in columns and features in rows.
92
93 --------------------------------------------------------------------------------
94
95 **INPUT**
96
97 **Example - Wide Format Input Dataset**
98
99 +---------+-----------+---------+---------+-----+
100 | rowID | m/z ratio | sample1 | sample2 | ... |
101 +=========+===========+=========+=========+=====+
102 | 1 | 8.845 | 20 | 10 | ... |
103 +---------+-----------+---------+---------+-----+
104 | 2 | 0.258 | 22 | 30 | ... |
105 +---------+-----------+---------+---------+-----+
106 | 3 | 10.54 | 27 | 2 | ... |
107 +---------+-----------+---------+---------+-----+
108 | 4 | 8.594 | 17 | 8 | ... |
109 +---------+-----------+---------+---------+-----+
110 | ... | ... | ... | ... | ... |
111 +---------+-----------+---------+---------+-----+
112
113 **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File.
114
115 **Unique FeatureID**
116
117 If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one.
118
119 **Prefix**
120
121 The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number.
122
123 **Sample Columns**
124
125 Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File.
126
127 --------------------------------------------------------------------------------
128
129 **OUTPUT**
130
131
132 **A Wide Dataset containing the FeatureID column and all columns selected as samples**
133
134 +------------+---------+---------+---------+-----+
135 | FeatureID | sample1 | sample2 | sample3 | ... |
136 +============+=========+=========+=========+=====+
137 | met_1 | 10 | 20 | 10 | ... |
138 +------------+---------+---------+---------+-----+
139 | met_2 | 5 | 22 | 30 | ... |
140 +------------+---------+---------+---------+-----+
141 | met_3 | 30 | 27 | 2 | ... |
142 +------------+---------+---------+---------+-----+
143 | met_4 | 32 | 17 | 8 | ... |
144 +------------+---------+---------+---------+-----+
145 | ... | ... | ... | ... | ... |
146 +------------+---------+---------+---------+-----+
147
148 In the above example, *met* was input for Prefix
149
150
151 **A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples**
152
153 +----------+---------+
154 | SampleID | |
155 +==========+=========+
156 | sample1 | |
157 +----------+---------+
158 | sample2 | |
159 +----------+---------+
160 | sample3 | |
161 +----------+---------+
162 | sample4 | |
163 +----------+---------+
164 | ... | |
165 +----------+---------+
166
167
168 **An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns**
169
170 +-------------+------------+-----+
171 | FeatureID | m/z ratio | ... |
172 +=============+============+=====+
173 | FeatureID_1 | 8.845 | ... |
174 +-------------+------------+-----+
175 | FeatureID_2 | 0.258 | ... |
176 +-------------+------------+-----+
177 | FeatureID_3 | 10.54 | ... |
178 +-------------+------------+-----+
179 | FeatureID_4 | 8.594 | ... |
180 +-------------+------------+-----+
181 | ... | ... | ... |
182 +-------------+------------+-----+
183
184
185 ]]>
186 </help>
187 <citations>
188 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
189 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
190 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
191 journal = {BMC Bioinformatics},
192 year = {in press}
193 }</citation>
194 <citation type="bibtex">@article{garcia2010paintomics,
195 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
196 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
197 journal={Bioinformatics},
198 volume={27},
199 number={1},
200 pages={137--139},
201 year={2010},
202 publisher={Oxford University Press}
203 }</citation>
204 </citations>
205 </tool>