lefse: format_input.xml annotate

author	george-weingart
date	Tue, 13 May 2014 21:57:00 -0400
parents
children

rev	line source
0 e7cd19afda2e Lefse george-weingart parents: diff changeset	1 <tool id="LEfSe_for" name="A) Format Data for LEfSe" version="1.0">
e7cd19afda2e Lefse george-weingart parents: diff changeset	2 <code file="format_input_selector.py"/>
e7cd19afda2e Lefse george-weingart parents: diff changeset	3 <description></description>
e7cd19afda2e Lefse george-weingart parents: diff changeset	4 <!-- <command interpreter="python">./format_input.py $inp_data $formatted_input -f $feat_dir -c $cls_n -s $subcls_n -u $subj_n -o 1000000.0 </command> -->
e7cd19afda2e Lefse george-weingart parents: diff changeset	5 <command interpreter="python">format_input.py $inp_data $formatted_input -f $cond.feat_dir -c $cond.cls_n -s $cond.subcls_n -u $cond.subj_n -o $norm </command>
e7cd19afda2e Lefse george-weingart parents: diff changeset	6 <inputs>
e7cd19afda2e Lefse george-weingart parents: diff changeset	7 <page>
e7cd19afda2e Lefse george-weingart parents: diff changeset	8 <param format="tabular" name="inp_data" type="data" label="Upload a tabular file of relative abundances and class labels (possibly also subclass and subjects labels) for LEfSe - See samples below - Please use Galaxy Get-Data/Upload-File. Use File-Type = Tabular" help=""/>
e7cd19afda2e Lefse george-weingart parents: diff changeset	9 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	10
e7cd19afda2e Lefse george-weingart parents: diff changeset	11 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">
e7cd19afda2e Lefse george-weingart parents: diff changeset	12 <param name="feat_dir" type="select" data_ref="inp_data" label="Select whether the vectors (features and meta-data information) are listed in rows or columns" help="">
e7cd19afda2e Lefse george-weingart parents: diff changeset	13 <option value="r" selected='True'>Rows</option>
e7cd19afda2e Lefse george-weingart parents: diff changeset	14 <option value="c">Columns</option>
e7cd19afda2e Lefse george-weingart parents: diff changeset	15 </param>
e7cd19afda2e Lefse george-weingart parents: diff changeset	16
e7cd19afda2e Lefse george-weingart parents: diff changeset	17 <when value="r">
e7cd19afda2e Lefse george-weingart parents: diff changeset	18 <param name="cls_n" label="Select which row to use as class" size ="70" type='select' dynamic_options="get_cols(inp_data,'r','cl')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	19 <param name="subcls_n" label="Select which row to use as subclass" type='select' dynamic_options="get_cols(inp_data,'r','subclass')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	20 <param name="subj_n" label="Select which row to use as subject" type='select' dynamic_options="get_cols(inp_data,'r','subject')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	21 </when>
e7cd19afda2e Lefse george-weingart parents: diff changeset	22 <when value="c">
e7cd19afda2e Lefse george-weingart parents: diff changeset	23 <param name="cls_n" label="Select which column to use as class" type='select' dynamic_options="get_cols(inp_data,'c','cl')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	24 <param name="subcls_n" label="Select which column to use as subclass" type='select' dynamic_options="get_cols(inp_data,'c','subclass')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	25 <param name="subj_n" label="Select which column to use as subject" type='select' dynamic_options="get_cols(inp_data,'c','subject')" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	26 </when>
e7cd19afda2e Lefse george-weingart parents: diff changeset	27
e7cd19afda2e Lefse george-weingart parents: diff changeset	28 </conditional>
e7cd19afda2e Lefse george-weingart parents: diff changeset	29
e7cd19afda2e Lefse george-weingart parents: diff changeset	30 <param name="norm" type="select" label="Per-sample normalization of the sum of the values to 1M (recommended when very low values are present)" help="">
e7cd19afda2e Lefse george-weingart parents: diff changeset	31 <option value="1000000.0" selected='True'>Yes</option>
e7cd19afda2e Lefse george-weingart parents: diff changeset	32 <option value="-1">No</option>
e7cd19afda2e Lefse george-weingart parents: diff changeset	33 </param>
e7cd19afda2e Lefse george-weingart parents: diff changeset	34
e7cd19afda2e Lefse george-weingart parents: diff changeset	35 <!-- <param name="row" label="on row" type="data_row" data_ref="inp_data" accept_default="true" /> -->
e7cd19afda2e Lefse george-weingart parents: diff changeset	36 </page>
e7cd19afda2e Lefse george-weingart parents: diff changeset	37 </inputs>
e7cd19afda2e Lefse george-weingart parents: diff changeset	38 <outputs>
e7cd19afda2e Lefse george-weingart parents: diff changeset	39 <data format="lefse" name="formatted_input" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	40 </outputs>
e7cd19afda2e Lefse george-weingart parents: diff changeset	41
e7cd19afda2e Lefse george-weingart parents: diff changeset	42 <tests>
e7cd19afda2e Lefse george-weingart parents: diff changeset	43 <test>
e7cd19afda2e Lefse george-weingart parents: diff changeset	44 <param name="inp_data" value="lefse_input" ftype="tabular" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	45 <param name="cond.feat_dir" value="r" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	46 <param name="cond.cls_n" value="1" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	47 <param name="cond.subcls" value="-1" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	48 <param name="cond.subj" value="-1" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	49 <param name="norm" value="1000000" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	50 <output name="formatted_input" file="lefse_output_a" />
e7cd19afda2e Lefse george-weingart parents: diff changeset	51 </test>
e7cd19afda2e Lefse george-weingart parents: diff changeset	52 </tests>
e7cd19afda2e Lefse george-weingart parents: diff changeset	53
e7cd19afda2e Lefse george-weingart parents: diff changeset	54
e7cd19afda2e Lefse george-weingart parents: diff changeset	55
e7cd19afda2e Lefse george-weingart parents: diff changeset	56
e7cd19afda2e Lefse george-weingart parents: diff changeset	57 <help>
e7cd19afda2e Lefse george-weingart parents: diff changeset	58
e7cd19afda2e Lefse george-weingart parents: diff changeset	59
e7cd19afda2e Lefse george-weingart parents: diff changeset	60 What it does
e7cd19afda2e Lefse george-weingart parents: diff changeset	61
e7cd19afda2e Lefse george-weingart parents: diff changeset	62 LDA Effect Size (LEfSe) `(Segata et. al 2010)`_ is an algorithm for high-dimensional biomarker discovery and
e7cd19afda2e Lefse george-weingart parents: diff changeset	63 explanation that identifies genomic features (genes, pathways, or taxa) characterizing
e7cd19afda2e Lefse george-weingart parents: diff changeset	64 the differences between two or more biological conditions (or classes, see figure below). It
e7cd19afda2e Lefse george-weingart parents: diff changeset	65 emphasizes both statistical significance and biological relevance, allowing
e7cd19afda2e Lefse george-weingart parents: diff changeset	66 researchers to identify differentially abundant features that are also consistent with
e7cd19afda2e Lefse george-weingart parents: diff changeset	67 biologically meaningful categories (subclasses). LEfSe first robustly
e7cd19afda2e Lefse george-weingart parents: diff changeset	68 identifies features that are statistically different among biological classes. It then
e7cd19afda2e Lefse george-weingart parents: diff changeset	69 performs additional tests to assess whether these differences are consistent with
e7cd19afda2e Lefse george-weingart parents: diff changeset	70 respect to expected biological behavior.
e7cd19afda2e Lefse george-weingart parents: diff changeset	71
e7cd19afda2e Lefse george-weingart parents: diff changeset	72 Specifically, we first use the non-parametric factorial
e7cd19afda2e Lefse george-weingart parents: diff changeset	73 Kruskal-Wallis (KW) sum-rank test to detect features with
e7cd19afda2e Lefse george-weingart parents: diff changeset	74 significant differential abundance with respect to the class of interest; biological
e7cd19afda2e Lefse george-weingart parents: diff changeset	75 significance is subsequently investigated using a set of pairwise tests among
e7cd19afda2e Lefse george-weingart parents: diff changeset	76 subclasses using the (unpaired) Wilcoxon rank-sum test. As a last step, LEfSe uses
e7cd19afda2e Lefse george-weingart parents: diff changeset	77 Linear Discriminant Analysis to estimate the effect size of each differentially
e7cd19afda2e Lefse george-weingart parents: diff changeset	78 abundant feature and, if desired by the investigator, to perform dimension reduction.
e7cd19afda2e Lefse george-weingart parents: diff changeset	79
e7cd19afda2e Lefse george-weingart parents: diff changeset	80 LEfSe consists of six modules performing the following steps (see the figure below).
e7cd19afda2e Lefse george-weingart parents: diff changeset	81
e7cd19afda2e Lefse george-weingart parents: diff changeset	82 The first step consists of uploading your file by using Galaxy's "Get-Data / Upload-file"
e7cd19afda2e Lefse george-weingart parents: diff changeset	83
e7cd19afda2e Lefse george-weingart parents: diff changeset	84
e7cd19afda2e Lefse george-weingart parents: diff changeset	85 The next steps are:
e7cd19afda2e Lefse george-weingart parents: diff changeset	86
e7cd19afda2e Lefse george-weingart parents: diff changeset	87 + A) Format Data for LEfSe: selects the structure of the problem (classes, subclasses, subjects) and formats the tabular abundance data for the B module
e7cd19afda2e Lefse george-weingart parents: diff changeset	88 + B) LDA Effect Size (LEfSe): performs the analysis using the data formatted with module A and provides input for the visualization modules (C, D, E, F)
e7cd19afda2e Lefse george-weingart parents: diff changeset	89 + C) Plot LEfSe Results: graphically reports the discovered biomarkes (output of B) with their effect sizes
e7cd19afda2e Lefse george-weingart parents: diff changeset	90 + D) Plot Cladogram: graphically represents the discovered biomarkers (output of B) in a taxonomic tree specified by the hierarchical feature names (not available for non-hierarchical features)
e7cd19afda2e Lefse george-weingart parents: diff changeset	91 + E) Plot One Feature: plots the row values of a feature (biomarker or not) as an abundance histogram with classes and subclasses structure (only one feature at the time)
e7cd19afda2e Lefse george-weingart parents: diff changeset	92 + F) Plot Differential Features: plots the row values of all features (biomarkers or not) as abundance histograms with classes and subclasses structure and provides a zip archive of the figures
e7cd19afda2e Lefse george-weingart parents: diff changeset	93
e7cd19afda2e Lefse george-weingart parents: diff changeset	94 .. image:: https://bytebucket.org/biobakery/galaxy_lefse/wiki/lefse_ove.png
e7cd19afda2e Lefse george-weingart parents: diff changeset	95
e7cd19afda2e Lefse george-weingart parents: diff changeset	96
e7cd19afda2e Lefse george-weingart parents: diff changeset	97 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	98
e7cd19afda2e Lefse george-weingart parents: diff changeset	99
e7cd19afda2e Lefse george-weingart parents: diff changeset	100 Input file format
e7cd19afda2e Lefse george-weingart parents: diff changeset	101
e7cd19afda2e Lefse george-weingart parents: diff changeset	102 The text tab-delimited input file consists of a list of numerical features, the class vector and optionally the subclass and subject vectors. The features can be read counts directly or abundance floating-point values more generally, and the first field is the name of the feature. Class, subclass and subject vectors have a name (the first field) and a list of non-numerical strings.
e7cd19afda2e Lefse george-weingart parents: diff changeset	103
e7cd19afda2e Lefse george-weingart parents: diff changeset	104 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::
e7cd19afda2e Lefse george-weingart parents: diff changeset	105
e7cd19afda2e Lefse george-weingart parents: diff changeset	106 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal
e7cd19afda2e Lefse george-weingart parents: diff changeset	107 subsite oral gut oral oral gut skin nasal skin ear nasal
e7cd19afda2e Lefse george-weingart parents: diff changeset	108 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672
e7cd19afda2e Lefse george-weingart parents: diff changeset	109 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993
e7cd19afda2e Lefse george-weingart parents: diff changeset	110 Bacteria\|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688
e7cd19afda2e Lefse george-weingart parents: diff changeset	111 Bacteria\|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511
e7cd19afda2e Lefse george-weingart parents: diff changeset	112 Bacteria\|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665
e7cd19afda2e Lefse george-weingart parents: diff changeset	113 Bacteria\|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453
e7cd19afda2e Lefse george-weingart parents: diff changeset	114 Bacteria\|Firmicutes\|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381
e7cd19afda2e Lefse george-weingart parents: diff changeset	115
e7cd19afda2e Lefse george-weingart parents: diff changeset	116 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \\|.
e7cd19afda2e Lefse george-weingart parents: diff changeset	117
e7cd19afda2e Lefse george-weingart parents: diff changeset	118
e7cd19afda2e Lefse george-weingart parents: diff changeset	119 Input file sample
e7cd19afda2e Lefse george-weingart parents: diff changeset	120
e7cd19afda2e Lefse george-weingart parents: diff changeset	121 You can try the LEfSe modules using the dataset available here_. You can upload the dataset using Galaxy's Get-Data / Upload File
e7cd19afda2e Lefse george-weingart parents: diff changeset	122
e7cd19afda2e Lefse george-weingart parents: diff changeset	123 This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists in class information only, as we don't have subject or subclass information. The same dataset is used to show the graphical results in the module descriptions.
e7cd19afda2e Lefse george-weingart parents: diff changeset	124
e7cd19afda2e Lefse george-weingart parents: diff changeset	125
e7cd19afda2e Lefse george-weingart parents: diff changeset	126
e7cd19afda2e Lefse george-weingart parents: diff changeset	127 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	128
e7cd19afda2e Lefse george-weingart parents: diff changeset	129 STEP A:
e7cd19afda2e Lefse george-weingart parents: diff changeset	130 -------
e7cd19afda2e Lefse george-weingart parents: diff changeset	131
e7cd19afda2e Lefse george-weingart parents: diff changeset	132
e7cd19afda2e Lefse george-weingart parents: diff changeset	133 What STEP A does
e7cd19afda2e Lefse george-weingart parents: diff changeset	134
e7cd19afda2e Lefse george-weingart parents: diff changeset	135 Preprocessing module for the biomarker discovery tool called LEfSe:
e7cd19afda2e Lefse george-weingart parents: diff changeset	136
e7cd19afda2e Lefse george-weingart parents: diff changeset	137 This module of LEfSe preprocesses metagenomic abundance data for the analyses to be carried out with the "Run LEfSe" module. This module is separated from the "Run LEfSe" because one may want to preprocess the data only once but run multiple analyses.
e7cd19afda2e Lefse george-weingart parents: diff changeset	138
e7cd19afda2e Lefse george-weingart parents: diff changeset	139 For an overview of LEfSe please refer to the "Introduction" module or to `(Segata et. al 2011)`_.
e7cd19afda2e Lefse george-weingart parents: diff changeset	140
e7cd19afda2e Lefse george-weingart parents: diff changeset	141 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	142
e7cd19afda2e Lefse george-weingart parents: diff changeset	143 Input format
e7cd19afda2e Lefse george-weingart parents: diff changeset	144
e7cd19afda2e Lefse george-weingart parents: diff changeset	145 The module accepts tabular data with the feature list in rows or columns.
e7cd19afda2e Lefse george-weingart parents: diff changeset	146
e7cd19afda2e Lefse george-weingart parents: diff changeset	147 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	148
e7cd19afda2e Lefse george-weingart parents: diff changeset	149 Output format
e7cd19afda2e Lefse george-weingart parents: diff changeset	150
e7cd19afda2e Lefse george-weingart parents: diff changeset	151 The module generates data readable by the "Run LEfSe" module only.
e7cd19afda2e Lefse george-weingart parents: diff changeset	152
e7cd19afda2e Lefse george-weingart parents: diff changeset	153 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	154
e7cd19afda2e Lefse george-weingart parents: diff changeset	155 Parameters
e7cd19afda2e Lefse george-weingart parents: diff changeset	156
e7cd19afda2e Lefse george-weingart parents: diff changeset	157 The class vector represents the labels of the main condition under investigation. The (optional) subclass vector denotes the internal groupings with biological meaning inside each class (subclasses of different classes with the same name are processed as different subclasses). The subject vector (optional) reports a third dimension denoting meta-data (subject id, sample type, ... ) which is independent from the class and subclass definition.
e7cd19afda2e Lefse george-weingart parents: diff changeset	158
e7cd19afda2e Lefse george-weingart parents: diff changeset	159 The labels can have a hierarchical organization (see example below) reflecting taxonomies (like NCBI or RDB microbial taxonomy, SEED subsystems or GO terms). The taxonomic levels are specified using the character \\|.
e7cd19afda2e Lefse george-weingart parents: diff changeset	160
e7cd19afda2e Lefse george-weingart parents: diff changeset	161 The per-sample normalization is usually applied for metagenomic data in which the relative abundances are taken into account.
e7cd19afda2e Lefse george-weingart parents: diff changeset	162
e7cd19afda2e Lefse george-weingart parents: diff changeset	163 ------
e7cd19afda2e Lefse george-weingart parents: diff changeset	164
e7cd19afda2e Lefse george-weingart parents: diff changeset	165 Example
e7cd19afda2e Lefse george-weingart parents: diff changeset	166
e7cd19afda2e Lefse george-weingart parents: diff changeset	167 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::
e7cd19afda2e Lefse george-weingart parents: diff changeset	168
e7cd19afda2e Lefse george-weingart parents: diff changeset	169 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal
e7cd19afda2e Lefse george-weingart parents: diff changeset	170 subsite oral gut oral oral gut skin nasal skin ear nasal
e7cd19afda2e Lefse george-weingart parents: diff changeset	171 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672
e7cd19afda2e Lefse george-weingart parents: diff changeset	172 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993
e7cd19afda2e Lefse george-weingart parents: diff changeset	173 Bacteria\|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688
e7cd19afda2e Lefse george-weingart parents: diff changeset	174 Bacteria\|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511
e7cd19afda2e Lefse george-weingart parents: diff changeset	175 Bacteria\|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665
e7cd19afda2e Lefse george-weingart parents: diff changeset	176 Bacteria\|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453
e7cd19afda2e Lefse george-weingart parents: diff changeset	177 Bacteria\|Firmicutes\|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381
e7cd19afda2e Lefse george-weingart parents: diff changeset	178
e7cd19afda2e Lefse george-weingart parents: diff changeset	179 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \\|.
e7cd19afda2e Lefse george-weingart parents: diff changeset	180
e7cd19afda2e Lefse george-weingart parents: diff changeset	181 Example with the "mouse model dataset"
e7cd19afda2e Lefse george-weingart parents: diff changeset	182
e7cd19afda2e Lefse george-weingart parents: diff changeset	183 You can try the LEfSe modules using the dataset available here_. This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists of class information only, as we don't have subject or subclass information. The dataset contains the features organized in rows; you need to select the first row as class, whereas you have to select "no subclass" and "no subject" options.
e7cd19afda2e Lefse george-weingart parents: diff changeset	184
e7cd19afda2e Lefse george-weingart parents: diff changeset	185
e7cd19afda2e Lefse george-weingart parents: diff changeset	186 .. _here: http://www.huttenhower.org/webfm_send/73
e7cd19afda2e Lefse george-weingart parents: diff changeset	187 .. _(Segata et. al 2011): http://www.ncbi.nlm.nih.gov/pubmed/21702898
e7cd19afda2e Lefse george-weingart parents: diff changeset	188 .. _(Garrett et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20833380
e7cd19afda2e Lefse george-weingart parents: diff changeset	189 .. _(Veiga et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20921388
e7cd19afda2e Lefse george-weingart parents: diff changeset	190 .. _contact us: nsegata@hsph.harvard.edu
e7cd19afda2e Lefse george-weingart parents: diff changeset	191
e7cd19afda2e Lefse george-weingart parents: diff changeset	192
e7cd19afda2e Lefse george-weingart parents: diff changeset	193
e7cd19afda2e Lefse george-weingart parents: diff changeset	194
e7cd19afda2e Lefse george-weingart parents: diff changeset	195 How to Cite LEfSe
e7cd19afda2e Lefse george-weingart parents: diff changeset	196
e7cd19afda2e Lefse george-weingart parents: diff changeset	197 If you find LEfSe usefull in your research please city our paper `(Segata et. al 2010)`_:
e7cd19afda2e Lefse george-weingart parents: diff changeset	198
e7cd19afda2e Lefse george-weingart parents: diff changeset	199 \| `Nicola Segata`_, Jacques Izard, Levi Walron, Dirk Gevers, Larisa Miropolsky, Wendy Garrett, `Curtis Huttenhower`_.
e7cd19afda2e Lefse george-weingart parents: diff changeset	200 \| "`Metagenomic Biomarker Discovery and Explanation`_"
e7cd19afda2e Lefse george-weingart parents: diff changeset	201 \| Genome Biology, 2011 Jun 24;12(6):R60
e7cd19afda2e Lefse george-weingart parents: diff changeset	202
e7cd19afda2e Lefse george-weingart parents: diff changeset	203
e7cd19afda2e Lefse george-weingart parents: diff changeset	204 Please do not hesitate to `contact us`_ for any questions of comments.
e7cd19afda2e Lefse george-weingart parents: diff changeset	205
e7cd19afda2e Lefse george-weingart parents: diff changeset	206 .. _here: http://www.huttenhower.org/webfm_send/73
e7cd19afda2e Lefse george-weingart parents: diff changeset	207 .. _(Segata et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/21702898
e7cd19afda2e Lefse george-weingart parents: diff changeset	208 .. _(Garrett et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20833380
e7cd19afda2e Lefse george-weingart parents: diff changeset	209 .. _(Veiga et. al 2010): http://www.ncbi.nlm.nih.gov/pubmed/20921388
e7cd19afda2e Lefse george-weingart parents: diff changeset	210 .. _contact us: nsegata@hsph.harvard.edu
e7cd19afda2e Lefse george-weingart parents: diff changeset	211 .. _Nicola Segata: nsegata@hsph.harvard.edu
e7cd19afda2e Lefse george-weingart parents: diff changeset	212 .. _Curtis Huttenhower: chuttenh@hsph.harvard.edu
e7cd19afda2e Lefse george-weingart parents: diff changeset	213 .. _Metagenomic Biomarker Discovery and Explanation: http://genomebiology.com/2011/12/6/R60
e7cd19afda2e Lefse george-weingart parents: diff changeset	214
e7cd19afda2e Lefse george-weingart parents: diff changeset	215
e7cd19afda2e Lefse george-weingart parents: diff changeset	216
e7cd19afda2e Lefse george-weingart parents: diff changeset	217
e7cd19afda2e Lefse george-weingart parents: diff changeset	218 </help>
e7cd19afda2e Lefse george-weingart parents: diff changeset	219 </tool>

0

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

1 <tool id="LEfSe_for" name="A) Format Data for LEfSe" version="1.0">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

2 <code file="format_input_selector.py"/>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

3 <description></description>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

4

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

5 <command interpreter="python">format_input.py $inp_data $formatted_input -f $cond.feat_dir -c $cond.cls_n -s $cond.subcls_n -u $cond.subj_n -o $norm </command>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

6 <inputs>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

7 <page>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

8 <param format="tabular" name="inp_data" type="data" label="Upload a tabular file of relative abundances and class labels (possibly also subclass and subjects labels) for LEfSe - See samples below - Please use Galaxy Get-Data/Upload-File. Use File-Type = Tabular" help=""/>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

9 <param name="cond" type="data_column" data_ref="inp_data" accept_default="true" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

10

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

11 <conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

12 <param name="feat_dir" type="select" data_ref="inp_data" label="Select whether the vectors (features and meta-data information) are listed in rows or columns" help="">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

13 <option value="r" selected='True'>Rows</option>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

14 <option value="c">Columns</option>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

15 </param>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

16

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

17 <when value="r">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

18 <param name="cls_n" label="Select which row to use as class" size ="70" type='select' dynamic_options="get_cols(inp_data,'r','cl')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

19 <param name="subcls_n" label="Select which row to use as subclass" type='select' dynamic_options="get_cols(inp_data,'r','subclass')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

20 <param name="subj_n" label="Select which row to use as subject" type='select' dynamic_options="get_cols(inp_data,'r','subject')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

21 </when>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

22 <when value="c">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

23 <param name="cls_n" label="Select which column to use as class" type='select' dynamic_options="get_cols(inp_data,'c','cl')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

24 <param name="subcls_n" label="Select which column to use as subclass" type='select' dynamic_options="get_cols(inp_data,'c','subclass')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

25 <param name="subj_n" label="Select which column to use as subject" type='select' dynamic_options="get_cols(inp_data,'c','subject')" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

26 </when>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

27

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

28 </conditional>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

29

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

30 <param name="norm" type="select" label="Per-sample normalization of the sum of the values to 1M (recommended when very low values are present)" help="">

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

31 <option value="1000000.0" selected='True'>Yes</option>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

32 <option value="-1">No</option>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

33 </param>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

34

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

35

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

36 </page>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

37 </inputs>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

38 <outputs>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

39 <data format="lefse" name="formatted_input" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

40 </outputs>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

41

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

42 <tests>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

43 <test>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

44 <param name="inp_data" value="lefse_input" ftype="tabular" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

45 <param name="cond.feat_dir" value="r" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

46 <param name="cond.cls_n" value="1" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

47 <param name="cond.subcls" value="-1" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

48 <param name="cond.subj" value="-1" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

49 <param name="norm" value="1000000" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

50 <output name="formatted_input" file="lefse_output_a" />

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

51 </test>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

52 </tests>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

53

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

54

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

55

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

56

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

57 <help>

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

58

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

59

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

60 **What it does**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

61

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

62 LDA Effect Size (LEfSe) `(Segata et. al 2010)`_ is an algorithm for high-dimensional biomarker discovery and

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

63 explanation that identifies genomic features (genes, pathways, or taxa) characterizing

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

64 the differences between two or more biological conditions (or classes, see figure below). It

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

65 emphasizes both statistical significance and biological relevance, allowing

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

66 researchers to identify differentially abundant features that are also consistent with

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

67 biologically meaningful categories (subclasses). LEfSe first robustly

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

68 identifies features that are statistically different among biological classes. It then

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

69 performs additional tests to assess whether these differences are consistent with

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

70 respect to expected biological behavior.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

71

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

72 Specifically, we first use the non-parametric factorial

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

73 Kruskal-Wallis (KW) sum-rank test to detect features with

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

74 significant differential abundance with respect to the class of interest; biological

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

75 significance is subsequently investigated using a set of pairwise tests among

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

76 subclasses using the (unpaired) Wilcoxon rank-sum test. As a last step, LEfSe uses

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

77 Linear Discriminant Analysis to estimate the effect size of each differentially

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

78 abundant feature and, if desired by the investigator, to perform dimension reduction.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

79

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

80 LEfSe consists of six modules performing the following steps (see the figure below).

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

81

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

82 The first step consists of **uploading your file** by using Galaxy's "Get-Data / Upload-file"

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

83

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

84

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

85 The next steps are:

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

86

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

87 + **A) Format Data for LEfSe**: selects the structure of the problem (classes, subclasses, subjects) and formats the tabular abundance data for the B module

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

88 + **B) LDA Effect Size (LEfSe)**: performs the analysis using the data formatted with module A and provides input for the visualization modules (C, D, E, F)

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

89 + **C) Plot LEfSe Results**: graphically reports the discovered biomarkes (output of B) with their effect sizes

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

90 + **D) Plot Cladogram**: graphically represents the discovered biomarkers (output of B) in a taxonomic tree specified by the hierarchical feature names (not available for non-hierarchical features)

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

91 + **E) Plot One Feature**: plots the row values of a feature (biomarker or not) as an abundance histogram with classes and subclasses structure (only one feature at the time)

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

92 + **F) Plot Differential Features**: plots the row values of all features (biomarkers or not) as abundance histograms with classes and subclasses structure and provides a zip archive of the figures

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

93

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

94 .. image:: https://bytebucket.org/biobakery/galaxy_lefse/wiki/lefse_ove.png

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

95

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

96

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

97 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

98

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

99

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

100 **Input file format**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

101

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

102 The text tab-delimited input file consists of a list of numerical features, the class vector and optionally the subclass and subject vectors. The features can be read counts directly or abundance floating-point values more generally, and the first field is the name of the feature. Class, subclass and subject vectors have a name (the first field) and a list of non-numerical strings.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

103

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

104 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

105

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

106 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

107 subsite oral gut oral oral gut skin nasal skin ear nasal

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

108 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

109 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

110 Bacteria|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

111 Bacteria|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

112 Bacteria|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

113 Bacteria|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

114 Bacteria|Firmicutes|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

115

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

116 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \|.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

117

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

118

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

119 **Input file sample**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

120

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

121 You can try the LEfSe modules using the dataset available here_. You can upload the dataset using Galaxy's **Get-Data / Upload File**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

122

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

123 This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists in class information only, as we don't have subject or subclass information. The same dataset is used to show the graphical results in the module descriptions.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

124

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

125

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

126

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

127 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

128

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

129 STEP A:

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

130 -------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

131

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

132

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

133 **What STEP A does**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

134

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

135 Preprocessing module for the biomarker discovery tool called LEfSe:

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

136

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

137 This module of LEfSe preprocesses metagenomic abundance data for the analyses to be carried out with the "Run LEfSe" module. This module is separated from the "Run LEfSe" because one may want to preprocess the data only once but run multiple analyses.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

138

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

139 For an overview of LEfSe please refer to the "Introduction" module or to `(Segata et. al 2011)`_.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

140

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

141 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

142

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

143 **Input format**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

144

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

145 The module accepts tabular data with the feature list in rows or columns.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

146

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

147 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

148

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

149 **Output format**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

150

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

151 The module generates data readable by the "Run LEfSe" module only.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

152

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

153 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

154

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

155 **Parameters**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

156

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

157 The class vector represents the labels of the main condition under investigation. The (optional) subclass vector denotes the internal groupings with biological meaning inside each class (subclasses of different classes with the same name are processed as different subclasses). The subject vector (optional) reports a third dimension denoting meta-data (subject id, sample type, ... ) which is independent from the class and subclass definition.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

158

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

159 The labels can have a hierarchical organization (see example below) reflecting taxonomies (like NCBI or RDB microbial taxonomy, SEED subsystems or GO terms). The taxonomic levels are specified using the character \|.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

160

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

161 The per-sample normalization is usually applied for metagenomic data in which the relative abundances are taken into account.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

162

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

163 ------

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

164

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

165 **Example**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

166

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

167 Although both column and row feature organization is accepted, given the high-dimensional nature of metagenomic data, the listing of the features in rows is preferred. A partial example of an input file follows (all values are separated by single-tab)::

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

168

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

169 bodysite mucosal mucosal mucosal mucosal mucosal non_mucosal non_mucosal non_mucosal non_mucosal non_mucosal

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

170 subsite oral gut oral oral gut skin nasal skin ear nasal

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

171 id 1023 1023 1672 1876 1672 159005010 1023 1023 1023 1672

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

172 Bacteria 0.99999 0.99999 0.999993 0.999989 0.999997 0.999927 0.999977 0.999987 0.999997 0.999993

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

173 Bacteria|Actinobacteria 0.311037 0.000864363 0.00446132 0.0312045 0.000773642 0.359354 0.761108 0.603002 0.95913 0.753688

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

174 Bacteria|Bacteroidetes 0.0689602 0.804293 0.00983343 0.0303561 0.859838 0.0195298 0.0212741 0.145729 0.0115617 0.0114511

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

175 Bacteria|Firmicutes 0.494223 0.173411 0.715345 0.813046 0.124552 0.177961 0.189178 0.188964 0.0226835 0.192665

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

176 Bacteria|Proteobacteria 0.0914284 0.0180378 0.265664 0.109549 0.00941215 0.430869 0.0225884 0.0532684 0.00512034 0.0365453

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

177 Bacteria|Firmicutes|Clostridia 0.090041 0.170246 0.00483188 0.0465328 0.122702 0.0402301 0.0460614 0.135201 0.0115835 0.0537381

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

178

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

179 In this case one may want to use bodysite as class, subsite as subclass and id as subject. Notice that the features have a hierarchical structure specified using the character \|.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

180

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

181 **Example with the "mouse model dataset"**

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

182

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

183 You can try the LEfSe modules using the dataset available here_. This is a 16S dataset from `(Garrett et. al 2010)`_ and `(Veiga et. al 2010)`_ for studying the characteristics of the fecal microbiota in a mouse model of spontaneous colitis. The dataset contains 30 abundance profiles (obtained processing the 16S reads with RDP) belonging to 10 rag2 (control) and 20 truc (case) mice. The metadata consists of class information only, as we don't have subject or subclass information. The dataset contains the features organized in rows; you need to select the first row as class, whereas you have to select "no subclass" and "no subject" options.

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

184

e7cd19afda2e Lefse

george-weingart

parents:

diff changeset

185

e7cd19afda2e Lefse

george-weingart