annotate galaxy_micropita/micropita.xml @ 3:8fb4630ab314 draft default tip

Uploaded
author sagun98
date Thu, 03 Jun 2021 17:07:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1 <tool id="micropita" name="Run" version="1.0.1">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
2 <code file="micropita_format_input_selector.py"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
3 <description>micropita</description>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
4 <command interpreter="python">micropita_prepare.py
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
5 --lastmeta $cls_x
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
6 -m $cond.method_sel
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
7 -n $selected_samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
8 --input $inp_data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
9 --output $out_file1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
10 --stratify_value $cls_s
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
11
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
12 #if $cond.method_sel == "features":
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
13 --feature_method $cond.feature_method
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
14 --targets $cond.cls_f
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
15 #end if
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
16 #if $cond.method_sel == "distinct" or $cond.method_sel == "discriminant" :
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
17 --label_value $cond.cls_L
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
18 #end if
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
19
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
20 </command>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
21 <inputs>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
22 <param format="micropita" name="inp_data" type="data" label="Input file"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
23
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
24 <param name="cls_x" type="select" label="Last metadata row (Use 'Label' for demo data)" multiple="False" size ="70" dynamic_options="get_cols(inp_data,'0')"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
25
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
26
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
27 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
28 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
29 <param name="method_sel" type="select" data_ref="inp_data" label="Select method">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
30 <option value="representative" selected="True">Representative</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
31 <option value="diverse">Diverse</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
32 <option value="extreme" >Extreme</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
33 <option value="features" >Features</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
34 <option value="distinct" >Distinct (Supervised)</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
35 <option value="discriminant" >Discriminant (Supervised)</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
36 </param>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
37 <when value="representative">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
38 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
39 <when value="diverse">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
40 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
41 <when value="extreme">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
42 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
43 <when value="features">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
44 <param name="cls_f" type="select" label="Targeted feature(s)" multiple="True" size ="70" dynamic_options="get_cols_features(inp_data,'0',cls_x)"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
45 <param name="feature_method" type="select" format="text">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
46 <label>Selection type</label>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
47 <option value="rank">Rank</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
48 <option value="abundance">Abundance</option>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
49 </param>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
50 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
51 <when value="distinct">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
52 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
53 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
54
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
55 <when value="discriminant">
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
56 <param name="cls_L" type="select" label="Label (Use 'Group' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
57 </when>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
58
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
59 </conditional>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
60
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
61
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
62 <param name="cls_s" type="select" label="Stratify by (optional) (Use 'StratifyLabel' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,'0',cls_x)"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
63
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
64 <param name="selected_samples" type="integer" size="4" value="10" label="Number of samples to select"/>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
65
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
66
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
67
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
68 </inputs>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
69
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
70 <outputs>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
71 <data format="text" name="out_file1" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
72 </outputs>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
73 <requirements>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
74 <requirement type="set_environment">micropita_SCRIPT_PATH</requirement>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
75 </requirements>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
76 <tests>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
77 <test>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
78 <param name="inp_data" value="micropita_input" ftype="micropita" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
79 <param name="cls_x" value="5" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
80 <param name="cls_s" value="5" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
81 <param name="selected_samples" value="10" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
82 <param name="cond.method_sel" value="representative" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
83 <output name="out_file1" file="micropita_output" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
84 <assert_contents>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
85 <has_text text="representative Sample_22_R Sample_20_R" />
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
86 </assert_contents>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
87 </test>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
88 </tests>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
89 <help>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
90
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
91
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
92 microbiome: Picking Interesting Taxonomic Abundance
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
93 ---------------------------------------------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
94
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
95
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
96
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
97 microPITA is a computational tool enabling sample selection in tiered studies. Using tiered-study designs can more efficiently allocate resources, reducing study costs, and maximizing the use of samples. From a survey study, selection of samples can be performed to target various microbial communities including:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
98
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
99 1. Samples with the most diverse community (maximum diversity);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
100 2. Samples dominated by specific microbes (targeted feature);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
101 3. Samples with microbial communities representative of the survey (representative dissimilarity);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
102 4. Samples with the most extreme microbial communities in the survey (most dissimilar);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
103 5. Given a phenotype (like disease state), samples at the border of phenotypes (discriminant) or samples typical of each phenotype (distinct).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
104
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
105 Additionally, methods can leverage clinical metadata by stratifying samples into groups in which samples are subsequently selected. This enables the use of microPITA in cohort studies.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
106
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
107
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
108 .. image:: https://github/biobakery/galaxy_micropita/wiki/HMPStool10PCoA.png
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
109 :height: 500
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
110 :width: 600
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
111
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
112 MicroPITA unsupervised method selection in the HMP 16S Gut Microbiome. Selection of 10 samples using targeted feature targeting *Bacteroides* (blue), maximum diversity (orange), representative dissimilarity (purple), and most dissimilar (pink) using Principle Covariance Analysis (PCoA) for ordination. Targeted feature selects samples dominated by *Bacteroides* (upper left) while maximum diversity select more diverse samples away from *Bacteroides* dominant samples. Representative selection selects samples covering the range of samples in the PCoA plot focusing on the higher density central region while maximum dissimilarity selects samples at the periphery of the plot.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
113
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
114
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
115 Intructions to run:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
116 -------------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
117
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
118 Before running microPita, you must upload your data using Glaxay's **Get Data - Upload File**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
119 Please make sure that you choose **File Format Micropita**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
120 An example can be found at https://bytebucket.org/biobakery/micropita/wiki/micropita_sample_PCL.txt
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
121
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
122 Required inputs
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
123 ---------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
124
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
125 microPITA requires an input pcl file of metadata and microbial community measurements. Although some defaults can be changed, microPITA expects a PCL file as an input file. A PCL file is a text delimited file similar to an excel spread sheet with the following characteristics.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
126
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
127 1. **Rows** represent metadata and features (bugs), **columns** represent samples.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
128 2. The **first row** by default should be the sample ids.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
129 3. Metadata rows should be next.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
130 4. Lastly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
131 5. The **first column** should contain the ID describing the column. For metadata this may be, for example, "Age" for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
132 6. The file is expected to be TAB delimited.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
133 7. If a consensus lineage or hierarchy of taxonomy is contained in the feature name, the default delimiter between clades is the pipe ("|").
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
134
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
135 **Note** MAC users, please save file as windows formatted text.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
136
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
137 .. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/pcl_diagram.png
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
138 :height: 500
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
139 :width: 600
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
140
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
141 Outputs
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
142 -------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
143
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
144 The Run MicroPITA module will create one output text file. The output will consist of one line starting with a key word for the selection method and then followed by selected samples delimited by tabs. An example of 6 samples selected by the representative:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
145
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
146 representative sample_1 sample_2 sample_3 sample_4 sample_5 sample_6
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
147
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
148
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
149
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
150
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
151 Run microPITA
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
152 -------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
153
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
154 A brief description of the Run micropita module.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
155
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
156 **Input file:**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
157 This should be populated by the Load microPITA module.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
158
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
159 **Last metadata row:**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
160 The row on the input pcl file that is the last metadata. All microbial measurements should follow this row.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
161
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
162 **Select method:**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
163 Select which method to use for sample selection. Selection methods include:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
164
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
165 1. Representative. Samples with microbial communities representative of the survey (representative dissimilarity);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
166 2. Diverse. Samples with the most diverse community (maximum diversity);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
167 3. Extreme. Samples with the most extreme microbial communities in the survey (most dissimilar);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
168 4. Features. Samples dominated by specific microbes (targeted feature);
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
169 5. Distinct. Given a phenotype (like disease state), samples typical of each phenotype (Distinct).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
170 6. Discriminant. Given a phenotype (like disease state), samples at the border of phenotypes (Discriminant).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
171
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
172 **Targeted feature(s):** (visible with Features method selection only)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
173 Select 1 or more features to target in sample selection.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
174
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
175 **Selection type:** (visible with Features method selection only)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
176 Rank or Abundance.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
177
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
178 1. Rank indicates selecting samples that have the highest rank of the Targeted features(s), this tends to select sample in which these feature dominant the sample.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
179 2. Abundance indicates selecting samples that have the highest average abundance of the Targeted features(s), this selects samples where features are most abundant but not necessarily dominant in the community.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
180
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
181 **Label:** (visible with supervised method selection only)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
182 The row which contains the label used to classify the samples from supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
183
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
184 **Stratify by (optional):**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
185 The row which contains the groupings the samples will first be placed in before running the selection method on each group. If no grouping is selected, selection methods will be performed on the data set as a whole.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
186
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
187 **Number of samples to select:**
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
188 The number of samples to select. If samples are stratified, this is per stratification (or group). If supervised methods are used, this is the number of samples selected per classification group (as defined by the label).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
189
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
190 For more information please visit http://huttenhower.sph.harvard.edu/micropita
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
191
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
192
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
193 Acknowledgments
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
194 ---------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
195 Special thanks to Eric Franzosa for developing the above PCL figure!
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
196
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
197 Citation and Contacts
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
198 ---------------------
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
199
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
200 For more information please visit http://huttenhower.sph.harvard.edu/micropita
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
201 When using MicroPITA please cite:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
202 Tickle T, Segata N, Waldron L, Weingart G, Huttenhower C. Two-stage microbial community experimental design. (Under review)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
203
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
204 Please feel free to contact us at ttickle@hsph.harvard.edu for any questions or comments!
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
205
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
206
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
207 </help>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
208 </tool>