annotate src/lib/AnalysisModules.R @ 0:e0b5980139d9

maaslin
author george-weingart
date Tue, 13 May 2014 22:00:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1 #####################################################################################
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
3 #
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
9 #the following conditions:
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
10 #
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
13 #
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
20 #
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
24 #####################################################################################
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
25
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
28 ##description<< Allows one to plug in new modules to perform analysis (univariate or multivariate), regularization, and data (response) transformation.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
30
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
31 # Libraries
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
32 suppressMessages(library( agricolae, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
33 # Needed for the pot-hoc Kruskal wallis comparisons
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
34 suppressMessages(library( penalized, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
35 # Needed for stepAIC
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
36 suppressMessages(library( MASS, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
37 # Needed for na action behavior
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
38 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
39 # Needed for boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
40 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
41 # Needed for LASSO
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
42 suppressMessages(library( glmnet, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
43 # Needed for mixed models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
44 #suppressMessages(library( lme4, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
45 suppressMessages(library( nlme, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
46
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
47 # Needed for zero inflated models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
48 #suppressMessages(library( MCMCglmm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
49 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
50 suppressMessages(library( gamlss, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
51 # Do not use #suppressMessages(library( glmmADMB, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
52
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
53 fAddBack = TRUE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
54 dUnevenMax = .9
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
55
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
56
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
57 ### Helper functions
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
58 # OK
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
59 funcMakeContrasts <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
60 ### Makes univariate contrasts of all predictors in the model formula with the response.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
61 (strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
62 ### lm style string defining reponse and predictors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
63 strRandomFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
64 ### mixed model string defining the fixed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
65 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
66 ### The data frame to find predictor data in
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
67 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
68 ### Taxon
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
69 functionContrast,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
70 ### functionContrast The univariate test to perform
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
71 lsQCCounts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
72 ### QC info
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
73 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
74 #TODO are we updating the QCCounts?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
75 lsSig = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
76 ### Holds all the significance results from the tests
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
77 adP = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
78 ### Holds the p-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
79 sCurDataName = names(frmeTmp)[iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
80 ### The name of the taxon (data row) that is being associated (always assumed to be numeric)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
81 #Get test comparisons (predictor names from formula string)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
82 asComparisons = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
83
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
84 #Change metadata in formula to univariate comparisons
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
85 for(sComparison in asComparisons)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
86 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
87 # Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
88 vxTest = frmeTmp[[sComparison]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
89
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
90 # Get the levels in the comparison
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
91 # Can ignore the first level because it is the reference level
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
92 asLevels = sComparison
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
93 if(is.factor(vxTest)){asLevels = levels(vxTest)[2:length(vxTest)]}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
94
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
95 lasComparisonResults = functionContrast(x=sComparison, adCur=frmeTmp[[sCurDataName]], dfData=frmeTmp)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
96 for(asComparison in lasComparisonResults)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
97 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
98 if( is.na( asComparison$p.value ) ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
99 # Get pvalue
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
100 adP = c(adP, asComparison$p.value)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
101 # Get SD, if not available, give SD of covariate
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
102 dSTD = asComparison$SD
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
103 # TODO Is using sd on factor and binary data correct?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
104 if(is.na(dSTD) || is.null(dSTD)){dSTD = sd(vxTest)}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
105
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
106 lsSig[[length( lsSig ) + 1]] <- list(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
107 #Current metadata name (string column name) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
108 name = sComparison,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
109 #Current metadatda name (string, as a factor level if existing as such) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
110 orig = asComparison$name,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
111 #Taxon feature name (string) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
112 taxon = colnames( frmeTmp )[iTaxon],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
113 #Taxon data / response (double vector) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
114 data = frmeTmp[,iTaxon],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
115 #Name of column ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
116 factors = sComparison,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
117 #Metadata values (metadata as a factor or raw numeric) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
118 metadata = vxTest,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
119 #Current coefficient value (named coef value with level name (from coefs) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
120 value = asComparison$coef,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
121 #Standard deviation (numeric) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
122 std = dSTD,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
123 #Model coefficients (output from coefs with intercept) ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
124 allCoefs = asComparison$coef)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
125 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
126 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
127 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
128 ### Returns a list of p-value, standard deviation, and comparison which produced the p-value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
129 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
130
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
131 #Ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
132 funcGetStepPredictors <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
133 ### Retrieve the predictors of the reduced model after stepwise model selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
134 (lmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
135 ### Linear model resulting from step-wise model selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
136 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
137 strLog
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
138 ### File to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
139 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
140 #Document
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
141 funcWrite( "#model", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
142 funcWrite( lmod$fit, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
143 #TODO funcWrite( lmod$train.error, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
144 #TODO funcWrite( lmod$Terms, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
145 funcWrite( "#summary-gbm", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
146 funcWrite( summary(lmod), strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
147
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
148 #Get Names from coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
149 asStepCoefsFactors = coefficients(lmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
150 astrCoefNames = setdiff(names(asStepCoefsFactors[as.vector(!is.na(asStepCoefsFactors))==TRUE]),"(Intercept)")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
151 asStepCoefsFactors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
152
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
153 if(length(asStepCoefsFactors)<1){ return(NA) }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
154 return( asStepCoefsFactors )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
155 ### Vector of string predictor names
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
156 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
157
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
158 funcGetUnivariateResults <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
159 ### Reduce the list of list of results to the correct format
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
160 ( llmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
161 ### The list of univariate models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
162 frmeData,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
163 ### Data analysis is performed on
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
164 liTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
165 ### The response id
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
166 dSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
167 ### Significance level for q-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
168 adP,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
169 ### List of pvalues from all associations performed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
170 lsSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
171 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
172 strLog,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
173 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
174 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
175 ### Records of counts associated with quality control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
176 lastrCols,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
177 ### Predictors used in the association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
178 asSuppressCovariates=c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
179 ### Vector of covariates to suppress and not give results for
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
180 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
181 adP = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
182 lsSig = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
183 for(lmod in llmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
184 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
185 adP = c(adP,lmod$adP)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
186 lsSig = c(lsSig,lmod$lsSig)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
187 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
188 return(list(adP=adP, lsSig=lsSig, lsQCCounts=llmod[length(llmod)]$lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
189 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
190
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
191 # OK
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
192 funcGetLMResults <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
193 ### Reduce the lm object return to just the data needed for further analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
194 ( llmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
195 ### The result from a linear model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
196 frmeData,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
197 ### Data analysis is performed on
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
198 liTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
199 ### The response id
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
200 dSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
201 ### Significance level for q-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
202 adP,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
203 ### List of pvalues from all associations performed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
204 lsSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
205 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
206 strLog,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
207 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
208 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
209 ### Records of counts associated with quality control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
210 lastrCols,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
211 ### Predictors used in the association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
212 asSuppressCovariates=c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
213 ### Vector of covariates to suppress and not give results for
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
214 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
215 ilmodIndex = 0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
216 for( lmod in llmod )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
217 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
218 ilmodIndex = ilmodIndex + 1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
219 lmod = llmod[[ ilmodIndex ]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
220 iTaxon = liTaxon[[ ilmodIndex ]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
221 astrCols = lastrCols[[ ilmodIndex ]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
222
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
223 #Exclude none and errors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
224 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
225 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
226 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
227 iPValuePosition = 4
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
228
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
229 #Get the column name of the iTaxon index
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
230 #frmeTmp needs to be what?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
231 strTaxon = colnames( frmeData )[iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
232 #Get summary information from the linear model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
233 lsSum = try( summary( lmod ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
234 #The following can actually happen when the stranger regressors return broken results
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
235 if( class( lsSum ) == "try-error" )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
236 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
237 next
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
238 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
239
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
240 #Write summary information to log file
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
241 funcWrite( "#model", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
242 funcWrite( lmod, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
243 funcWrite( "#summary", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
244 #Unbelievably, some of the more unusual regression methods crash out when _printing_ their results
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
245 try( funcWrite( lsSum, strLog ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
246
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
247 #Get the coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
248 #This should work for linear models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
249 frmeCoefs <- try( coefficients( lsSum ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
250
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
251 if( ( class(frmeCoefs ) == "try-error" ) || is.null( frmeCoefs ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
252 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
253 adCoefs = try( coefficients( lmod ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
254 if(class( adCoefs ) == "try-error")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
255 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
256 adCoefs = coef(lmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
257 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
258 frmeCoefs <- NA
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
259 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
260 if( class( frmeCoefs ) == "list" )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
261 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
262 frmeCoefs <- frmeCoefs$count
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
263 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
264 adCoefs = frmeCoefs[,1]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
265 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
266
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
267 #Go through each coefficient
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
268 astrRows <- names( adCoefs )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
269
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
270 ##lmm
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
271 if( is.null( astrRows ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
272 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
273 astrRows = rownames( lsSum$tTable )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
274 frmeCoefs = lsSum$tTable
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
275 iPValuePosition = 5
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
276 adCoefs = frmeCoefs[,1]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
277 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
278
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
279 for( iMetadata in 1:length( astrRows ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
280 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
281 #Current coef which is being evaluated
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
282 strOrig = astrRows[ iMetadata ]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
283 #Skip y interscept
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
284 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
285 #Skip suppressed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
286 if( funcCoef2Col( strOrig, frmeData ) %in% asSuppressCovariates){ next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
287
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
288 #Extract pvalue and std in standard model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
289 dP = frmeCoefs[ strOrig, iPValuePosition ]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
290 dStd = frmeCoefs[ strOrig, 2 ]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
291
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
292 #Attempt to extract the pvalue and std in mixed effects summary
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
293 #Could not get the pvalue so skip result
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
294 if( is.nan( dP ) || is.na( dP ) || is.null( dP ) ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
295
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
296 dCoef = adCoefs[ iMetadata ]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
297
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
298 #Setting adMetadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
299 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
300 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
301 if( is.na( strMetadata ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
302 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
303 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
304 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
305 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
306 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
307 adMetadata <- frmeData[, strMetadata ]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
308
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
309 # Store (factor level modified) p-value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
310 # Store general results for each coef
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
311 adP <- c( adP, dP )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
312
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
313 # Append to the list of information about associations
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
314 lsSig[[ length( lsSig ) + 1 ]] <- list(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
315 # Current metadata name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
316 name = strMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
317 # Current metadatda name (as a factor level if existing as such)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
318 orig = strOrig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
319 # Taxon feature name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
320 taxon = strTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
321 # Taxon data / response
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
322 data = frmeData[, iTaxon ],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
323 # All levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
324 factors = c( strMetadata ),
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
325 # Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
326 metadata = adMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
327 # Current coeficient value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
328 value = dCoef,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
329 # Standard deviation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
330 std = dStd,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
331 # Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
332 allCoefs = adCoefs )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
333 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
334 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
335 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
336 return( list( adP = adP, lsSig = lsSig, lsQCCounts = lsQCCounts ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
337 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
338 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
339
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
340 funcGetZeroInflatedResults <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
341 ### Reduce the lm object return to just the data needed for further analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
342 ( llmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
343 ### The result from a linear model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
344 frmeData,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
345 ### Data analysis is performed on
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
346 liTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
347 ### The response id
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
348 dSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
349 ### Significance level for q-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
350 adP,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
351 ### List of pvalues from all associations performed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
352 lsSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
353 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
354 strLog,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
355 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
356 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
357 ### Records of counts associated with quality control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
358 lastrCols,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
359 ### Predictors used in the association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
360 asSuppressCovariates=c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
361 ### Vector of covariates to suppress and not give results for
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
362 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
363 ilmodIndex = 0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
364 for(lmod in llmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
365 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
366 ilmodIndex = ilmodIndex + 1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
367 lmod = llmod[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
368 iTaxon = liTaxon[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
369 astrCols = lastrCols[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
370
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
371 #Exclude none and errors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
372 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
373 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
374 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
375 iPValuePosition = 4
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
376
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
377 #Get the column name of the iTaxon index
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
378 #frmeTmp needs to be what?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
379 strTaxon = colnames( frmeData )[iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
380
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
381 #Write summary information to log file
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
382 funcWrite( "#model", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
383 funcWrite( lmod, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
384
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
385 #Get the coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
386 #This should work for linear models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
387 frmeCoefs <- summary(lmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
388 if(! is.null( frmeCoefs$coefficients$count ) ) # Needed for zeroinfl
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
389 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
390 frmeCoefs = frmeCoefs$coefficients$count
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
391 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
392 adCoefs = frmeCoefs[,1]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
393 names(adCoefs) = row.names(frmeCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
394
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
395 funcWrite( "#Coefs", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
396 funcWrite( frmeCoefs, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
397
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
398 #Go through each coefficient
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
399 astrRows <- row.names( frmeCoefs )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
400
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
401 for( iMetadata in 1:length( astrRows ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
402 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
403 #Current coef which is being evaluated
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
404 strOrig = astrRows[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
405 #Skip y interscept
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
406 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
407 #Skip suppressed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
408 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
409
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
410 #Extract pvalue and std in standard model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
411 dP = frmeCoefs[strOrig, iPValuePosition]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
412 if(is.nan(dP)){next}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
413 dStd = frmeCoefs[strOrig,2]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
414
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
415 dCoef = adCoefs[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
416
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
417 #Setting adMetadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
418 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
419 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
420 if( is.na( strMetadata ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
421 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
422 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
423 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
424 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
425 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
426 adMetadata <- frmeData[,strMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
427
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
428 #Store (factor level modified) p-value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
429 #Store general results for each coef
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
430 adP <- c(adP, dP)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
431 lsSig[[length( lsSig ) + 1]] <- list(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
432 #Current metadata name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
433 name = strMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
434 #Current metadatda name (as a factor level if existing as such)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
435 orig = strOrig,#
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
436 #Taxon feature name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
437 taxon = strTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
438 #Taxon data / response
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
439 data = frmeData[,iTaxon],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
440 #All levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
441 factors = c(strMetadata),
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
442 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
443 metadata = adMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
444 #Current coeficient value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
445 value = dCoef,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
446 #Standard deviation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
447 std = dStd,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
448 #Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
449 allCoefs = adCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
450 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
451 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
452 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
453 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
454 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
455 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
456
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
457 oldfuncGetZeroInflatedResults <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
458 ### Reduce the lm object return to just the data needed for further analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
459 ( llmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
460 ### The result from a linear model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
461 frmeData,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
462 ### Data analysis is performed on
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
463 liTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
464 ### The response id
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
465 dSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
466 ### Significance level for q-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
467 adP,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
468 ### List of pvalues from all associations performed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
469 lsSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
470 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
471 strLog,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
472 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
473 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
474 ### Records of counts associated with quality control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
475 lastrCols,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
476 ### Predictors used in the association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
477 asSuppressCovariates=c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
478 ### Vector of covariates to suppress and not give results for
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
479 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
480 ilmodIndex = 0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
481 for(lmod in llmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
482 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
483 ilmodIndex = ilmodIndex + 1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
484 lmod = llmod[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
485 iTaxon = liTaxon[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
486 astrCols = lastrCols[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
487
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
488 #Exclude none and errors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
489 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
490 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
491 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
492 iPValuePosition = 4
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
493
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
494 #Get the column name of the iTaxon index
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
495 #frmeTmp needs to be what?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
496 strTaxon = colnames( frmeData )[iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
497
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
498 #Write summary information to log file
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
499 funcWrite( "#model", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
500 funcWrite( lmod, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
501
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
502 #Get the coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
503 #This should work for linear models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
504 frmeCoefs <- summary(lmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
505 adCoefs = frmeCoefs[,1]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
506 names(adCoefs) = row.names(frmeCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
507
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
508 #Go through each coefficient
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
509 astrRows <- row.names( frmeCoefs )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
510
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
511 for( iMetadata in 1:length( astrRows ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
512 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
513 #Current coef which is being evaluated
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
514 strOrig = astrRows[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
515 #Skip y interscept
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
516 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
517 #Skip suppressed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
518 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
519
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
520 #Extract pvalue and std in standard model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
521 dP = frmeCoefs[strOrig, iPValuePosition]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
522 dStd = frmeCoefs[strOrig,2]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
523
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
524 dCoef = adCoefs[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
525
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
526 #Setting adMetadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
527 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
528 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
529 if( is.na( strMetadata ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
530 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
531 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
532 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
533 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
534 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
535 adMetadata <- frmeData[,strMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
536
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
537 #Store (factor level modified) p-value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
538 #Store general results for each coef
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
539 adP <- c(adP, dP)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
540 lsSig[[length( lsSig ) + 1]] <- list(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
541 #Current metadata name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
542 name = strMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
543 #Current metadatda name (as a factor level if existing as such)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
544 orig = strOrig,#
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
545 #Taxon feature name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
546 taxon = strTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
547 #Taxon data / response
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
548 data = frmeData[,iTaxon],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
549 #All levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
550 factors = c(strMetadata),
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
551 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
552 metadata = adMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
553 #Current coeficient value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
554 value = dCoef,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
555 #Standard deviation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
556 std = dStd,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
557 #Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
558 allCoefs = adCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
559 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
560 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
561 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
562 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
563 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
564 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
565
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
566
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
567 notfuncGetZeroInflatedResults <- function
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
568 ### Reduce the lm object return to just the data needed for further analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
569 ( llmod,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
570 ### The result from a linear model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
571 frmeData,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
572 ### Data analysis is performed on
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
573 liTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
574 ### The response id
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
575 dSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
576 ### Significance level for q-values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
577 adP,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
578 ### List of pvalues from all associations performed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
579 lsSig,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
580 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
581 strLog,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
582 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
583 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
584 ### Records of counts associated with quality control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
585 lastrCols,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
586 ### Predictors used in the association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
587 asSuppressCovariates=c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
588 ### Vector of covariates to suppress and not give results for
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
589 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
590 ilmodIndex = 0
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
591 for(lmod in llmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
592 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
593 ilmodIndex = ilmodIndex + 1
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
594 lmod = llmod[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
595 iTaxon = liTaxon[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
596 astrCols = lastrCols[[ilmodIndex]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
597
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
598 #Exclude none and errors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
599 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
600 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
601 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
602 iPValuePosition = 4
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
603
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
604 #Get the column name of the iTaxon index
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
605 #frmeTmp needs to be what?
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
606 strTaxon = colnames( frmeData )[iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
607
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
608 #Write summary information to log file
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
609 funcWrite( "#model", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
610 funcWrite( lmod, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
611
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
612 #Get the coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
613 #This should work for linear models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
614 frmeCoefs <- summary(lmod)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
615 frmeCoefs = frmeCoefs$coefficients$count
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
616 funcWrite( "#Coefs", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
617 funcWrite( frmeCoefs, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
618
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
619 adCoefs = frmeCoefs[,1]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
620 names(adCoefs) = row.names(frmeCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
621
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
622 #Go through each coefficient
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
623 astrRows <- row.names( frmeCoefs )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
624
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
625 for( iMetadata in 1:length( astrRows ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
626 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
627 #Current coef which is being evaluated
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
628 strOrig = astrRows[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
629
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
630 #Skip y interscept
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
631 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
632 #Skip suppressed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
633 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
634
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
635 #Extract pvalue and std in standard model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
636 dP = frmeCoefs[strOrig, iPValuePosition]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
637 if(is.nan(dP)){ next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
638 dStd = frmeCoefs[strOrig,2]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
639 dCoef = adCoefs[iMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
640
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
641 #Setting adMetadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
642 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
643 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
644 if( is.na( strMetadata ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
645 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
646 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
647 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
648 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
649 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
650 adMetadata <- frmeData[,strMetadata]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
651
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
652 #Store (factor level modified) p-value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
653 #Store general results for each coef
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
654 adP <- c(adP, dP)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
655 lsSig[[length( lsSig ) + 1]] <- list(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
656 #Current metadata name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
657 name = strMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
658 #Current metadatda name (as a factor level if existing as such)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
659 orig = strOrig,#
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
660 #Taxon feature name
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
661 taxon = strTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
662 #Taxon data / response
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
663 data = frmeData[,iTaxon],
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
664 #All levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
665 factors = c(strMetadata),
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
666 #Metadata values
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
667 metadata = adMetadata,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
668 #Current coeficient value
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
669 value = dCoef,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
670 #Standard deviation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
671 std = dStd,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
672 #Model coefficients
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
673 allCoefs = adCoefs)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
674 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
675 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
676 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
677 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
678 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
679 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
680
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
681
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
682 ### Options for variable selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
683 # OK
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
684 funcBoostModel <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
685 ### Perform model selection / regularization with boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
686 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
687 ### The formula of the full model before boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
688 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
689 ### The data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
690 adCur,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
691 ### The response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
692 lsParameters,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
693 ### User controlled parameters needed specific to boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
694 lsForcedParameters = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
695 ### Force these predictors to be in the model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
696 strLog
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
697 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
698 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
699 funcWrite( c("#Boost formula", strFormula), strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
700 lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="laplace", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
701 #TODO# lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="gaussian", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
702
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
703 astrTerms <- c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
704 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
705 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
706 #Get boosting summary results
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
707 lsSum <- summary( lmod, plotit = FALSE )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
708
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
709 #Document
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
710 funcWrite( "#model-gbm", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
711 funcWrite( lmod$fit, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
712 funcWrite( lmod$train.error, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
713 funcWrite( lmod$Terms, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
714 funcWrite( "#summary-gbm", strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
715 funcWrite( lsSum, strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
716
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
717 # Uneven metadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
718 vstrUneven = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
719 # Kept metadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
720 vstrKeepMetadata = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
721
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
722 #Select model predictors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
723 #Check the frequency of selection and skip if not selected more than set threshold dFreq
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
724 for( strMetadata in lmod$var.names )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
725 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
726 #Get the name of the metadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
727 strTerm <- funcCoef2Col( strMetadata, frmeTmp, c(astrMetadata, astrGenetics) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
728
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
729 #Add back in uneven metadata
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
730 if(fAddBack)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
731 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
732 ldMetadata = frmeTmp[[strMetadata]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
733 if(length(which(table(ldMetadata)/length(ldMetadata)>dUnevenMax))>0)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
734 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
735 astrTerms <- c(astrTerms, strTerm)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
736 vstrUneven = c(vstrUneven,strMetadata)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
737 next
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
738 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
739 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
740
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
741 #If the selprob is less than a certain frequency, skip
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
742 dSel <- lsSum$rel.inf[which( lsSum$var == strMetadata )] / 100
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
743 if( is.na(dSel) || ( dSel <= lsParameters$dFreq ) ){ next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
744 #TODO# if( is.na(dSel) || ( dSel < lsParameters$dFreq ) ){ next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
745
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
746 #If you should ignore the metadata, continue
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
747 if( is.null( strTerm ) ) { next }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
748
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
749 #If you cant find the metadata name, write
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
750 if( is.na( strTerm ) )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
751 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
752 c_logrMaaslin$error( "Unknown coefficient: %s", strMetadata )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
753 next
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
754 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
755
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
756 #Collect metadata names
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
757 astrTerms <- c(astrTerms, strTerm)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
758 vstrKeepMetadata = c(vstrKeepMetadata,strTerm)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
759 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
760 } else { astrTerms = lsForcedParameters }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
761
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
762 # funcBoostInfluencePlot(vdRelInf=lsSum$rel.inf, sFeature=lsParameters$sBugName, vsPredictorNames=lsSum$var, vstrKeepMetadata=vstrKeepMetadata, vstrUneven=vstrUneven)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
763
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
764 return(unique(c(astrTerms,lsForcedParameters)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
765 ### Return a vector of predictor names to use in a reduced model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
766 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
767
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
768 #Glmnet default is to standardize the variables.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
769 #used as an example for implementation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
770 #http://r.789695.n4.nabble.com/estimating-survival-times-with-glmnet-and-coxph-td4614225.html
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
771 funcPenalizedModel <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
772 ### Perform penalized regularization for variable selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
773 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
774 ### The formula of the full model before boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
775 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
776 ### The data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
777 adCur,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
778 ### The response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
779 lsParameters,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
780 ### User controlled parameters needed specific to boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
781 lsForcedParameters = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
782 ### Force these predictors to be in the model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
783 strLog
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
784 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
785 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
786 #Convert the data frame to a model matrix
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
787 mtrxDesign = model.matrix(as.formula(strFormula), data=frmeTmp)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
788
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
789 #Cross validate the lambda
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
790 cvRet = cv.glmnet(x=mtrxDesign,y=adCur,alpha=lsParameters$dPAlpha)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
791
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
792 #Perform lasso
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
793 glmnetMod = glmnet(x=mtrxDesign,y=adCur,family=lsParameters$family,alpha=lsParameters$dPAlpha,lambda=cvRet$lambda.min)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
794
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
795 #Get non zero beta and return column names for covariate names.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
796 ldBeta = glmnetMod$beta[,which.max(glmnetMod$dev.ratio)]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
797 ldBeta = names(ldBeta[which(abs(ldBeta)>0)])
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
798 return(sapply(ldBeta,funcCoef2Col,frmeData=frmeTmp))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
799 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
800
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
801 # OK
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
802 funcForwardModel <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
803 ### Perform model selection with forward stepwise selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
804 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
805 ### lm style string defining reposne and predictors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
806 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
807 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
808 adCur,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
809 ### Response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
810 lsParameters,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
811 ### User controlled parameters needed specific to boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
812 lsForcedParameters = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
813 ### Force these predictors to be in the model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
814 strLog
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
815 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
816 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
817 funcWrite( c("#Forward formula", strFormula), strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
818
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
819 strNullFormula = "adCur ~ 1"
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
820 if(!is.null(lsForcedParameters))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
821 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
822 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
823 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
824 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
825 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
826 if(!("try-error" %in% c(class( lmodNull ),class( lmodFull ))))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
827 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
828 lmod = stepAIC(lmodNull, scope=list(lower=lmodNull,upper=lmodFull), direction="forward", trace=0)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
829 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
830 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
831 return( lsForcedParameters )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
832 ### Return a vector of predictor names to use in a reduced model or NA on error
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
833 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
834
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
835 # OK
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
836 # Select model with backwards selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
837 funcBackwardsModel <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
838 ### Perform model selection with backwards stepwise selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
839 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
840 ### lm style string defining reponse and predictors
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
841 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
842 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
843 adCur,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
844 ### Response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
845 lsParameters,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
846 ### User controlled parameters needed specific to boosting
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
847 lsForcedParameters = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
848 ### Force these predictors to be in the model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
849 strLog
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
850 ### File to which to document logging
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
851 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
852 funcWrite( c("#Backwards formula", strFormula), strLog )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
853
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
854 strNullFormula = "adCur ~ 1"
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
855 if(!is.null(lsForcedParameters))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
856 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
857 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
858 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
859
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
860 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
861 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
862
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
863 if(! class( lmodFull ) == "try-error" )
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
864 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
865 lmod = stepAIC(lmodFull, scope=list(lower=lmodNull, upper=lmodFull), direction="backward")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
866 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
867 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
868 return( lsForcedParameters ) }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
869 ### Return a vector of predictor names to use in a reduced model or NA on error
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
870 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
871
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
872 ### Analysis methods
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
873 ### Univariate options
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
874
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
875 # Sparse Dir. Model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
876 #TODO# Implement in sfle
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
877
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
878 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
879 # Correlation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
880 # NOTE: Ignores the idea of random and fixed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
881 funcSpearman <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
882 ### Perform multiple univariate comparisons producing spearman correlations for association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
883 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
884 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
885 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
886 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
887 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
888 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
889 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
890 ### List recording anything important to QC
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
891 strRandomFormula = NULL
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
892 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
893 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
894 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
895 functionContrast=function(x,adCur,dfData)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
896 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
897 retList = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
898 ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
899 #Returning rho for the coef in a named vector
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
900 vdCoef = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
901 vdCoef[[x]]=ret$estimate
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
902 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
903 return(retList)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
904 }, lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
905 ### List of contrast information, pvalue, contrast and std per univariate test
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
906 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
907
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
908 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
909 # Wilcoxon (T-Test)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
910 # NOTE: Ignores the idea of random and fixed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
911 funcWilcoxon <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
912 ### Perform multiple univariate comparisons performing wilcoxon tests on discontinuous data with 2 levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
913 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
914 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
915 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
916 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
917 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
918 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
919 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
920 ### List recording anything important to QC
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
921 strRandomFormula = NULL
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
922 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
923 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
924 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
925 functionContrast=function(x,adCur,dfData)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
926 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
927 retList = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
928 ret = wilcox.test(as.formula(paste("adCur",x,sep=" ~ ")), data=dfData, na.action=c_strNA_Action)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
929 #Returning NA for the coef in a named vector
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
930 vdCoef = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
931 vdCoef[[x]]=ret$statistic
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
932 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
933 return(retList)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
934 }, lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
935 ### List of contrast information, pvalue, contrast and std per univariate test
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
936 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
937
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
938 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
939 # Kruskal.Wallis (Nonparameteric anova)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
940 # NOTE: Ignores the idea of random and fixed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
941 funcKruskalWallis <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
942 ### Perform multiple univariate comparisons performing Kruskal wallis rank sum tests on discontuous data with more than 2 levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
943 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
944 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
945 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
946 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
947 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
948 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
949 lsQCCounts,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
950 ### List recording anything important to QC
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
951 strRandomFormula = NULL
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
952 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
953 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
954 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
955 functionContrast=function(x,adCur,dfData)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
956 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
957 retList = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
958 lmodKW = kruskal(adCur,dfData[[x]],group=FALSE,p.adj="holm")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
959
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
960 asLevels = levels(dfData[[x]])
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
961 # The names of the generated comparisons, sometimes the control is first sometimes it is not so
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
962 # We will just check which is in the names and use that
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
963 asComparisons = row.names(lmodKW$comparisons)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
964 #Get the comparison with the control
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
965 for(sLevel in asLevels[2:length(asLevels)])
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
966 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
967 sComparison = intersect(c(paste(asLevels[1],sLevel,sep=" - "),paste(sLevel,asLevels[1],sep=" - ")),asComparisons)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
968 #Returning NA for the coef in a named vector
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
969 vdCoef = c()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
970 vdCoef[[paste(x,sLevel,sep="")]]=lmodKW$comparisons[sComparison,"Difference"]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
971 # vdCoef[[paste(x,sLevel,sep="")]]=NA
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
972 retList[[length(retList)+1]]=list(p.value=lmodKW$comparisons[sComparison,"p.value"],SD=1.0,name=paste(x,sLevel,sep=""),coef=vdCoef)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
973 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
974 return(retList)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
975 }, lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
976 ### List of contrast information, pvalue, contrast and std per univariate test
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
977 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
978
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
979 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
980 # NOTE: Ignores the idea of random and fixed covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
981 funcDoUnivariate <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
982 ### Perform multiple univariate comparisons producing spearman correlations for association
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
983 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
984 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
985 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
986 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
987 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
988 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
989 lsHistory,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
990 ### List recording p-values, association information, and QC counts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
991 strRandomFormula = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
992 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
993 fZeroInflate = FALSE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
994 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
995 if(fZeroInflate)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
996 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
997 throw("There are no zero-inflated univariate models to perform your analysis.")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
998 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
999
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1000 # Get covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1001 astrCovariates = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1002
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1003 # For each covariate
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1004 for(sCovariate in astrCovariates)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1005 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1006 ## Check to see if it is discrete
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1007 axData = frmeTmp[[sCovariate]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1008 lsRet = NA
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1009 if(is.factor(axData) || is.logical(axData))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1010 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1011 ## If discrete check how many levels
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1012 lsDataLevels = levels(axData)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1013 ## If 2 levels do wilcoxon test
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1014 if(length(lsDataLevels) < 3)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1015 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1016 lsRet = funcWilcoxon(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1017 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1018 ## If 3 or more levels do kruskal wallis test
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1019 lsRet = funcKruskalWallis(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1020 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1021 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1022 ## If not discrete do spearman test (list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1023 lsRet = funcSpearman(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1024 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1025 lsHistory[["adP"]] = c(lsHistory[["adP"]], lsRet[["adP"]])
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1026 lsHistory[["lsSig"]] = c(lsHistory[["lsSig"]], lsRet[["lsSig"]])
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1027 lsHistory[["lsQCCounts"]] = lsRet[["lsQCCounts"]]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1028 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1029 return(lsHistory)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1030 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1031
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1032 ### Multivariate
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1033
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1034 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1035 funcLM <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1036 ### Perform vanilla linear regression
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1037 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1038 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1039 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1040 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1041 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1042 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1043 lsHistory,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1044 ### List recording p-values, association information, and QC counts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1045 strRandomFormula = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1046 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1047 fZeroInflated = FALSE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1048 ### Turns on the zero inflated model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1049 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1050 adCur = frmeTmp[,iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1051 if(fZeroInflated)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1052 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1053 return(try(gamlss(formula=as.formula(strFormula), family=BEZI, data=frmeTmp))) # gamlss
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1054 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1055 if(!is.null(strRandomFormula))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1056 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1057 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=gaussian(link="identity"), data=frmeTmp)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1058 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1059 #return(try( lmer(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1060 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1061 return(try( lm(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1062 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1063 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1064 ### lmod result object from lm
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1065 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1066
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1067 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1068 funcBinomialMult <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1069 ### Perform linear regression with negative binomial link
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1070 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1071 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1072 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1073 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1074 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1075 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1076 lsHistory,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1077 ### List recording p-values, association information, and QC counts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1078 strRandomFormula = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1079 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1080 fZeroInflated = FALSE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1081 ### Turns on the zero inflated model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1082 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1083 adCur = frmeTmp[,iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1084 if(fZeroInflated)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1085 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1086 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="negbin"))) # pscl
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1087 # return(try(gamlss(as.formula(strFormula), family=ZINBI, data=frmeTmp))) # pscl
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1088 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1089 if(!is.null(strRandomFormula))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1090 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1091 throw("This analysis flow is not completely developed, please choose an option other than negative bionomial with random covariates")
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1092 #TODO need to estimate the theta
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1093 #return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=negative.binomial(theta = 2, link=log), data=frmeTmp)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1094 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1095 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1096 return(try( glm.nb(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action )))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1097 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1098 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1099 ### lmod result object from lm
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1100 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1101
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1102 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1103 funcQuasiMult <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1104 ### Perform linear regression with quasi-poisson link
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1105 strFormula,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1106 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1107 frmeTmp,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1108 ### Data on which to perform analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1109 iTaxon,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1110 ### Index of the response data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1111 lsHistory,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1112 ### List recording p-values, association information, and QC counts
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1113 strRandomFormula = NULL,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1114 ### Has the formula for random covariates
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1115 fZeroInflated = FALSE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1116 ### Turns on a zero infalted model
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1117 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1118 adCur = frmeTmp[,iTaxon]
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1119 if(fZeroInflated)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1120 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1121 # return(try(gamlss(formula=as.formula(strFormula), family=ZIP, data=frmeTmp))) # gamlss
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1122 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="poisson"))) # pscl
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1123 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1124 #Check to see if | is in the model, if so use a lmm otherwise the standard glm is ok
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1125 if(!is.null(strRandomFormula))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1126 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1127 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family= quasipoisson, data=frmeTmp)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1128 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1129 #return(try ( glmer(as.formula(strFormula), data=frmeTmp, family=quasipoisson, na.action=c_strNA_Action) ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1130 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1131 return(try( glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action) ))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1132 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1133 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1134 ### lmod result object from lm
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1135 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1136
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1137 ### Transformations
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1138 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1139 funcArcsinSqrt <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1140 # Transform data with arcsin sqrt transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1141 aData
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1142 ### The data on which to perform the transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1143 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1144 return(asin(sqrt(aData)))
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1145 ### Transformed data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1146 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1147
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1148 funcSquareSin <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1149 # Transform data with square sin transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1150 # Opposite of the funcArcsinSqrt
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1151 aData
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1152 ### The data on which to perform the transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1153 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1154 return(sin(aData)^2)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1155 ### Transformed data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1156 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1157
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1158 # Tested
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1159 funcNoTransform <-function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1160 ### Pass data without transform
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1161 aData
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1162 ### The data on which to perform the transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1163 ### Only given here to preserve the pattern, not used.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1164 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1165 return(aData)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1166 ### Transformed data
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1167 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1168
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1169 funcGetAnalysisMethods <- function(
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1170 ### Returns the appropriate functions for regularization, analysis, data transformation, and analysis object inspection.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1171 ### This allows modular customization per analysis step.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1172 ### To add a new method insert an entry in the switch for either the selection, transform, or method
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1173 ### Insert them by using the pattern optparse_keyword_without_quotes = function_in_AnalysisModules
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1174 ### Order in the return listy is currently set and expected to be selection, transforms/links, analysis method
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1175 ### none returns null
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1176 sModelSelectionKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1177 ### Keyword defining the method of model selection
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1178 sTransformKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1179 ### Keyword defining the method of data transformation
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1180 sMethodKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1181 ### Keyword defining the method of analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1182 fZeroInflated = FALSE
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1183 ### Indicates if using zero inflated models
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1184 ){
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1185 lRetMethods = list()
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1186 #Insert selection methods here
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1187 lRetMethods[[c_iSelection]] = switch(sModelSelectionKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1188 boost = funcBoostModel,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1189 penalized = funcPenalizedModel,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1190 forward = funcForwardModel,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1191 backward = funcBackwardsModel,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1192 none = NA)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1193
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1194 #Insert transforms
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1195 lRetMethods[[c_iTransform]] = switch(sTransformKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1196 asinsqrt = funcArcsinSqrt,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1197 none = funcNoTransform)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1198
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1199 #Insert untransform
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1200 lRetMethods[[c_iUnTransform]] = switch(sTransformKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1201 asinsqrt = funcNoTransform,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1202 none = funcNoTransform)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1203
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1204 #Insert analysis
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1205 lRetMethods[[c_iAnalysis]] = switch(sMethodKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1206 neg_binomial = funcBinomialMult,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1207 quasi = funcQuasiMult,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1208 univariate = funcDoUnivariate,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1209 lm = funcLM,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1210 none = NA)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1211
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1212 # If a univariate method is used it is required to set this to true
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1213 # For correct handling.
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1214 lRetMethods[[c_iIsUnivariate]]=sMethodKey=="univariate"
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1215
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1216 #Insert method to get results
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1217 if(fZeroInflated)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1218 {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1219 lRetMethods[[c_iResults]] = switch(sMethodKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1220 neg_binomial = funcGetZeroInflatedResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1221 quasi = funcGetZeroInflatedResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1222 univariate = funcGetUnivariateResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1223 lm = funcGetZeroInflatedResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1224 none = NA)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1225 } else {
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1226 lRetMethods[[c_iResults]] = switch(sMethodKey,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1227 neg_binomial = funcGetLMResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1228 quasi = funcGetLMResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1229 univariate = funcGetUnivariateResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1230 lm = funcGetLMResults,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1231 none = NA)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1232 }
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1233
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1234 return(lRetMethods)
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1235 ### Returns a list of functions to be passed for regularization, data transformation, analysis,
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1236 ### and custom analysis results introspection functions to pull from return objects data of interest
e0b5980139d9 maaslin
george-weingart
parents:
diff changeset
1237 }