Mercurial > repos > malex > gait_gm
annotate split_wide_dataset.xml @ 2:2c218a253d56 draft default tip
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author | malex |
---|---|
date | Thu, 29 Jul 2021 20:48:10 +0000 |
parents | ec9ee8edb84d |
children |
rev | line source |
---|---|
1 | 1 <tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@"> |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
2 <description>from an input wide dataset</description> |
1 | 3 <macros> |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
7 <stdio> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
9 </stdio> |
1 | 10 <command detect_errors="exit_code"><![CDATA[ |
11 split_wide_dataset.py | |
12 -i=$input | |
13 #if $cond_UniqID.hasUniqID == "y": | |
14 -id=$cond_UniqID.uniqID | |
15 #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y": | |
16 -p2=$cond_UniqID.only_numbers.prefix2 | |
17 #end if | |
18 #else: | |
19 -p=$cond_UniqID.prefix | |
20 #end if | |
21 -s=$samples | |
22 -w=$wide | |
23 -d=$design | |
24 -a=$annot | |
25 ]]></command> | |
26 <inputs> | |
27 <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/> | |
28 <conditional name="cond_UniqID"> | |
29 <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?"> | |
30 <option value="y">Yes</option> | |
31 <option value="n">No</option> | |
32 </param> | |
33 <when value="y"> | |
34 <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/> | |
35 <conditional name="only_numbers"> | |
36 <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?"> | |
37 <option value="y">Yes</option> | |
38 <option value="n">No</option> | |
39 </param> | |
40 <when value="y"> | |
41 <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/> | |
42 </when> | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
43 <when value="n" /> |
1 | 44 </conditional> |
45 </when> | |
46 <when value="n"> | |
47 <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/> | |
48 </when> | |
49 </conditional> | |
50 <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/> | |
51 </inputs> | |
52 <outputs> | |
53 <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/> | |
54 <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/> | |
55 <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/> | |
56 </outputs> | |
57 <tests> | |
58 <test> | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
59 <param name="input" value="gene_input_dataset.tsv"/> |
1 | 60 <param name="prefix" value="Gene"/> |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
61 <param name="hasUniqID" value="n"/> |
1 | 62 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/> |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
63 <output name="wide" file="gene_wide_dataset.tsv"/> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
64 <output name="design" file="gene_design.tsv"/> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
65 <output name="annot" file="gene_annotation.tsv"/> |
1 | 66 </test> |
67 <test> | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
68 <param name="input" value="metabolite_input_dataset.tsv"/> |
1 | 69 <param name="prefix" value="Met"/> |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
70 <param name="hasUniqID" value="n"/> |
1 | 71 <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/> |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
72 <output name="wide" file="metabolite_wide_dataset.tsv"/> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
73 <output name="design" file="metabolite_design.tsv"/> |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
74 <output name="annot" file="metabolite_annotation.tsv"/> |
1 | 75 </test> |
76 </tests> | |
77 <help><![CDATA[ | |
78 | |
79 **Tool Description** | |
80 | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
81 This tool takes a single file containing both feature data (e.g. gene or metabolite expression values) and annotation |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
82 information (e.g. m/z ratio, compound name) and generates the following three files; |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
83 |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
84 (1) a wide dataset containing a unique row identifier and the expression values, |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
85 (2) a wide annotation file with the unique row identifier and any non-data descriptor columns, and |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
86 (3) a design file with a single column called ‘sampleID’ with the name of the columns containing the expression data. |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
87 |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
88 If the input dataset does not already contain a column with a unique identifier, the tool will create one. |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
89 The user can specify a prefix for the unique identifier column (e.g. 'met' for metabolite data). In cases where the input |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
90 dataset contains a numeric identifier, the tool will append a user-specified prefix or, if no prefix is specified, an underbar. |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
91 Since the user specifies which columns contain expression values, the resulting wide dataset contains only these data columns |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
92 and the unique row identifier column. Columns not specified as containing expression values are output into the annotation dataset. |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
93 The resulting design file template contains a single column called ‘sampleID’ that contains the names of the user-specified samples |
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
94 in the input data file. The design file can be modified by the user to include additional metadata columns. |
1 | 95 |
96 -------------------------------------------------------------------------------- | |
97 | |
98 **INPUT** | |
99 | |
100 **Example - Wide Format Input Dataset** | |
101 | |
102 +---------+-----------+---------+---------+-----+ | |
103 | rowID | m/z ratio | sample1 | sample2 | ... | | |
104 +=========+===========+=========+=========+=====+ | |
105 | 1 | 8.845 | 20 | 10 | ... | | |
106 +---------+-----------+---------+---------+-----+ | |
107 | 2 | 0.258 | 22 | 30 | ... | | |
108 +---------+-----------+---------+---------+-----+ | |
109 | 3 | 10.54 | 27 | 2 | ... | | |
110 +---------+-----------+---------+---------+-----+ | |
111 | 4 | 8.594 | 17 | 8 | ... | | |
112 +---------+-----------+---------+---------+-----+ | |
113 | ... | ... | ... | ... | ... | | |
114 +---------+-----------+---------+---------+-----+ | |
115 | |
2
2c218a253d56
"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents:
1
diff
changeset
|
116 **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File. |
1 | 117 |
118 **Unique FeatureID** | |
119 | |
120 If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one. | |
121 | |
122 **Prefix** | |
123 | |
124 The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number. | |
125 | |
126 **Sample Columns** | |
127 | |
128 Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File. | |
129 | |
130 -------------------------------------------------------------------------------- | |
131 | |
132 **OUTPUT** | |
133 | |
134 | |
135 **A Wide Dataset containing the FeatureID column and all columns selected as samples** | |
136 | |
137 +------------+---------+---------+---------+-----+ | |
138 | FeatureID | sample1 | sample2 | sample3 | ... | | |
139 +============+=========+=========+=========+=====+ | |
140 | met_1 | 10 | 20 | 10 | ... | | |
141 +------------+---------+---------+---------+-----+ | |
142 | met_2 | 5 | 22 | 30 | ... | | |
143 +------------+---------+---------+---------+-----+ | |
144 | met_3 | 30 | 27 | 2 | ... | | |
145 +------------+---------+---------+---------+-----+ | |
146 | met_4 | 32 | 17 | 8 | ... | | |
147 +------------+---------+---------+---------+-----+ | |
148 | ... | ... | ... | ... | ... | | |
149 +------------+---------+---------+---------+-----+ | |
150 | |
151 In the above example, *met* was input for Prefix | |
152 | |
153 | |
154 **A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples** | |
155 | |
156 +----------+---------+ | |
157 | SampleID | | | |
158 +==========+=========+ | |
159 | sample1 | | | |
160 +----------+---------+ | |
161 | sample2 | | | |
162 +----------+---------+ | |
163 | sample3 | | | |
164 +----------+---------+ | |
165 | sample4 | | | |
166 +----------+---------+ | |
167 | ... | | | |
168 +----------+---------+ | |
169 | |
170 | |
171 **An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns** | |
172 | |
173 +-------------+------------+-----+ | |
174 | FeatureID | m/z ratio | ... | | |
175 +=============+============+=====+ | |
176 | FeatureID_1 | 8.845 | ... | | |
177 +-------------+------------+-----+ | |
178 | FeatureID_2 | 0.258 | ... | | |
179 +-------------+------------+-----+ | |
180 | FeatureID_3 | 10.54 | ... | | |
181 +-------------+------------+-----+ | |
182 | FeatureID_4 | 8.594 | ... | | |
183 +-------------+------------+-----+ | |
184 | ... | ... | ... | | |
185 +-------------+------------+-----+ | |
186 | |
187 | |
188 ]]> | |
189 </help> | |
190 <citations> | |
191 <citation type="bibtex">@ARTICLE{Kirpich17secimtools, | |
192 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, | |
193 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, | |
194 journal = {BMC Bioinformatics}, | |
195 year = {in press} | |
196 }</citation> | |
197 <citation type="bibtex">@article{garcia2010paintomics, | |
198 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, | |
199 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, | |
200 journal={Bioinformatics}, | |
201 volume={27}, | |
202 number={1}, | |
203 pages={137--139}, | |
204 year={2010}, | |
205 publisher={Oxford University Press} | |
206 }</citation> | |
207 </citations> | |
208 </tool> |