annotate maaslin.xml @ 0:e0b5980139d9

maaslin
author george-weingart
date Tue, 13 May 2014 22:00:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1 <tool id="maaslin_run" name="MaAsLin" version="1.0.1">
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
2 <code file="maaslin_format_input_selector.py"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
3 <description></description>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
4 <command interpreter="python">maaslin_wrapper.py
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
5 --lastmeta $cls_x
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
6 --input $inp_data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
7 --output $out_file1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
8 --alpha $alpha
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
9 --min_abd $min_abd
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
10 --min_samp $min_samp
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
11 --zip_file $zip_file
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
12 --tool_option1 $tool_option1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
13 </command>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
14
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
15 <inputs>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
16 <param format="maaslin" name="inp_data" type="data" label="pcl file of metadata and microbial community measurements: Upload using Get Data-Upload file - Use File-Format = maaslin - Sample file below"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
17 <param name="cls_x" type="select" label="Last metadata row (Select 'Weight' for demo data set)" multiple="False" size ="70" dynamic_options="get_cols(inp_data,'0')"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
18 <param name="alpha" type="float" size="8" value="0.05" label="Maximum false discovery rate (significance threshold)"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
19 <param name="min_abd" type="float" size="8" value="0.0001" label="Minimum for feature relative abundance filtering"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
20 <param name="min_samp" type="float" size="8" value="0.01" label="Minimum for feature prevalence filtering"/>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
21
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
22 <param name="tool_option1" type="select" label="Type of output">
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
23 <option value="1">Single File: Summary</option>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
24 <option value="2">Two Files: Complete zipped results + Summary</option>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
25 </param>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
26 </inputs>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
27 <outputs>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
28 <data format="tabular" name="out_file1" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
29 <data name="zip_file" format="zip">
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
30 <filter>tool_option1 == "2"</filter>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
31 </data>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
32 </outputs>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
33 <requirements>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
34 <requirement type="set_environment">maaslin_SCRIPT_PATH</requirement>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
35 </requirements>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
36 <tests>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
37 <test>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
38 <param name="inp_data" value="maaslin_input" ftype="maaslin" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
39 <param name="cls_x" value="9" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
40 <param name="alpha" value="0.05" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
41 <param name="min_abd" value="0.0001" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
42 <param name="min_samp" value="0.01" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
43 <param name="tool_option1" value="1" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
44 <output name="out_file1" file="maaslin_output" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
45 <assert_contents>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
46 <has_text text="Variable Feature Value Coefficient N N.not.0 P.value Q.value" />
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
47 </assert_contents>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
48 </test>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
49 </tests>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
50 <help>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
51
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
52 Feedback? Not working? Please contact us at Maaslin_google_group_ .
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
53
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
54
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
55 MaAsLin: Multivariate Analysis by Linear Models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
56 -----------------------------------------------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
57
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
58 MaAsLin is a multivariate statistical framework that finds associations between clinical metadata and microbial community abundance or function. The clinical metadata can be of any type continuous (for example age and weight), boolean (sex, stool/biopsy), or discrete/factor (cohort groupings and phenotypes). MaAsLin is best used in the case when you are associating many metadata with microbial measurements. When this is the case each metadatum can be a diffrent type. For example, you could include age, weight, sex, cohort and phenotype in the same input file to be analyzed in the same MaAsLin run. The microbial measurements are expected to be normalized before using MaAsLin and so are proportional data ranging from 0 to 1.0.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
59
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
60 The results of a MaAsLin run are the association of a specific microbial community member with metadata. These associations are without the influence of the other metadata in the study. There are certain factors known that can influence the microbiome (for example diet, age, geography, fecal or biopsy sample origin). MaAsLin allows one to detect the effect of a metadata, possibly a phenotype, deconfounding the effects of diet, age, sample origin or any other metadata captured in the study!
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
61
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
62 .. image:: https://bytebucket.org/biobakery/galaxy_maaslin/wiki/Figure1-Overview.png
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
63 :height: 500
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
64 :width: 600
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
65
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
66
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
67 *Maaslin Analysis Overview* MaAsLin performs boosted, additive general linear models between one group of data (metadata/the predictors) and another group (in our case microbial abundance/the response). Given that metagenomic data is sparse, the boosting is used to select metadata that show some potential to be associated with microbial abundances. Boosting of metadata and selection of a model occurs per otu. The metadata data that is selected for use by boosting is then used in a general linear model using metadata as predictors and otu arcsin-square root transformed abundance as the response.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
68
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
69
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
70
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
71 For more information on the technical aspects to this algorithm please see the methodological evaluation of MaAsLin that compared it to multiviariate and univariate analyses. Please check back for paper citing.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
72
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
73 Process:
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
74 --------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
75 The first step consists of uploading your data using Galaxy's **Get Data - Upload File**
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
76
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
77 A sample file is located at: https://bytebucket.org/biobakery/maaslin/wiki/maaslin_demo_pcl.txt
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
78
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
79
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
80 **Important**
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
81
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
82 Please make sure to choose **File Format: maaslin**
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
83
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
84 Required inputs
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
85 ---------------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
86
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
87 MaAsLin requires an input pcl file of metadata and microbial community measurements. MaAsLin expects a PCL file as an input file. A PCL file is a text delimited file similar to an excel spread sheet with the following characteristics.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
88
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
89 1. **Rows** represent metadata and features (bugs), **columns** represent samples
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
90 2. The **first row** by default should be the sample ids.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
91 3. Metadata rows should be next.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
92 4. Lastly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
93 5. The **first column** should contain the ID describing the column. For metadata this may be, for example, ''Age'' for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
94 6. The file is expected to be TAB delimited.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
95
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
96
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
97
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
98
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
99
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
100
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
101 Description of parameters
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
102 -------------------------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
103 **Input file** Select a loaded data file to use in analysis.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
104
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
105 **Last metadata row** Metadata and microbial measurements should be rows of the pcl file. Metadata should all come before microbial measurements. This row is the last metadata row which is only followed by rows which are microbial measurements.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
106
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
107 **Maximum false discovery rate (Significance threshold)** Associations are found significant if thier q-value is equal to or less than this threshold.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
108
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
109 **Minimum for feature relative abundance filtering** The minimum relative abundance allowed in the data. Values below this are removed and imputed as the median of the sample data.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
110
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
111 **Minimum for feature prevalence filtering** The minimum percentage of samples a feature can have abudance in before being removed.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
112
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
113 **Type of Output** Select one of the two options for output (summary or detailed results).
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
114
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
115 Outputs
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
116 -------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
117
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
118 The Run MaAsLin module will create either A) a summary text file of plotted significant associations or B) a compressed directory of associations (significant and not significant).
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
119
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
120 A. Any association that had a q-value less than or equal to the significance threshold will be included in a tab-delimited file.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
121
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
122 B. The following files will be generated per MaAsLin run. In the following listing the term projectname refers to what you named your pcl file without the extension.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
123
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
124 **Analysis** (These files are useful for analysis):
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
125
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
126 **projectname-metadata.txt** Each metadata will have a file of associations. Any associations indicated to be performed after initial boosting is recorded here. Included are the information from the final general linear model (performed after the boosting) and the FDR corrected p-value (q-value). Can be opened as a text file or spreadsheet.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
127
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
128 **projectname-metadata.pdf** Any association that had a q-value less than or equal to the significance threshold will be plotted here. If this file does not exist, the projectname-metadata.txt should not have an entry that is less than or equal to the threshold. Factor and boolean data is plotted as knotched box plots; continuous data is plotted as a scatter plot with a line of best fit.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
129
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
130 .. image:: https://bytebucket.org/biobakery/galaxy_maaslin/wiki/Maaslin_Output.png
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
131 :height: 500
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
132 :width: 600
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
133
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
134
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
135
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
136 *Example of the projectname-metadata.pdf file* Significant associations are combined in files of associations per metadata. Factor and boolean data is plotted as knotched box plots; continuous data is plotted as a scatter plot with a line of best fit. Plots show raw data, header data show information from the reduced
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
137
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
138 **projectname_Summary.txt** Any entry in the projectname-metadata.pdf are collected together here. Can be opened as a text file or spreadsheet.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
139
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
140 **Troubleshooting** (These files are typically not used for analysis but are there for documenting the process and troubleshooting):
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
141
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
142 **projectname.txt** Contains the detail for the statistical engine. Is useful for detailed troubleshooting.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
143
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
144 **data.tsv** The data matrix that was read in (transposed). Useful for making sure the correct data was read in.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
145
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
146 **data.read.config** Can be used to read in the data.tsv .
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
147
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
148 **metadata.tsv** The metadata that was read in (transposed). Useful for making sure the correct metadata was read in.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
149
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
150 **metadata.read.config** Can be used to read in the data.tsv .
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
151
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
152 **read_merged.tsv** The data and metadata merged (transposed). Useful for making sure the merging occurred correctly.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
153
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
154 **read_merged.read.config** Can be used to read in the read_merged.tsv .
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
155
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
156 **read_cleaned.tsv** The data read in, merged, and then cleaned. After this process the data is written to this file for reference if needed.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
157
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
158 **read_cleaned.read.config** Can be used to read in read_cleaned.tsv .
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
159
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
160 **ProcessQC.txt** Contains quality control for the MaAsLin analysis. This includes information on the magnitude of outlier removal.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
161
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
162 Contacts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
163 --------
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
164
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
165 Please feel free to contact us at ttickle@hsph.harvard.edu for any questions or comments!
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
166
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
167 .. _Maaslin_google_group: https://groups.google.com/d/forum/maaslin-users
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
168
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
169 </help>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
170 </tool>