annotate maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/AnalysisModules.R @ 6:ca61989bc3b4

Uploaded
author george-weingart
date Sun, 08 Feb 2015 23:39:43 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1 #####################################################################################
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
3 #
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
9 #the following conditions:
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
10 #
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
13 #
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
20 #
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
24 #####################################################################################
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
25
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
28 ##description<< Allows one to plug in new modules to perform analysis (univariate or multivariate), regularization, and data (response) transformation.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
30
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
31 # Libraries
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
32 suppressMessages(library( agricolae, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
33 # Needed for the pot-hoc Kruskal wallis comparisons
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
34 suppressMessages(library( penalized, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
35 # Needed for stepAIC
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
36 suppressMessages(library( MASS, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
37 # Needed for na action behavior
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
38 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
39 # Needed for boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
40 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
41 # Needed for LASSO
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
42 suppressMessages(library( glmnet, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
43 # Needed for mixed models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
44 #suppressMessages(library( lme4, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
45 suppressMessages(library( nlme, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
46
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
47 # Needed for zero inflated models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
48 #suppressMessages(library( MCMCglmm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
49 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
50 suppressMessages(library( gamlss, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
51 # Do not use #suppressMessages(library( glmmADMB, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
52
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
53 fAddBack = TRUE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
54 dUnevenMax = .9
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
55
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
56
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
57 ### Helper functions
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
58 # OK
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
59 funcMakeContrasts <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
60 ### Makes univariate contrasts of all predictors in the model formula with the response.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
61 (strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
62 ### lm style string defining reponse and predictors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
63 strRandomFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
64 ### mixed model string defining the fixed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
65 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
66 ### The data frame to find predictor data in
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
67 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
68 ### Taxon
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
69 functionContrast,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
70 ### functionContrast The univariate test to perform
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
71 lsQCCounts
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
72 ### QC info
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
73 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
74 #TODO are we updating the QCCounts?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
75 lsSig = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
76 ### Holds all the significance results from the tests
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
77 adP = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
78 ### Holds the p-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
79 sCurDataName = names(frmeTmp)[iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
80 ### The name of the taxon (data row) that is being associated (always assumed to be numeric)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
81 #Get test comparisons (predictor names from formula string)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
82 asComparisons = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
83
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
84 #Change metadata in formula to univariate comparisons
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
85 for(sComparison in asComparisons)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
86 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
87 # Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
88 vxTest = frmeTmp[[sComparison]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
89
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
90 # Get the levels in the comparison
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
91 # Can ignore the first level because it is the reference level
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
92 asLevels = sComparison
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
93 if(is.factor(vxTest)){asLevels = levels(vxTest)[2:length(vxTest)]}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
94
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
95 lasComparisonResults = functionContrast(x=sComparison, adCur=frmeTmp[[sCurDataName]], dfData=frmeTmp)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
96 for(asComparison in lasComparisonResults)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
97 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
98 if( is.na( asComparison$p.value ) ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
99 # Get pvalue
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
100 adP = c(adP, asComparison$p.value)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
101 # Get SD, if not available, give SD of covariate
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
102 dSTD = asComparison$SD
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
103 # TODO Is using sd on factor and binary data correct?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
104 if(is.na(dSTD) || is.null(dSTD)){dSTD = sd(vxTest)}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
105
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
106 lsSig[[length( lsSig ) + 1]] <- list(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
107 #Current metadata name (string column name) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
108 name = sComparison,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
109 #Current metadatda name (string, as a factor level if existing as such) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
110 orig = asComparison$name,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
111 #Taxon feature name (string) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
112 taxon = colnames( frmeTmp )[iTaxon],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
113 #Taxon data / response (double vector) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
114 data = frmeTmp[,iTaxon],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
115 #Name of column ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
116 factors = sComparison,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
117 #Metadata values (metadata as a factor or raw numeric) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
118 metadata = vxTest,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
119 #Current coefficient value (named coef value with level name (from coefs) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
120 value = asComparison$coef,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
121 #Standard deviation (numeric) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
122 std = dSTD,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
123 #Model coefficients (output from coefs with intercept) ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
124 allCoefs = asComparison$coef)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
125 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
126 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
127 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
128 ### Returns a list of p-value, standard deviation, and comparison which produced the p-value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
129 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
130
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
131 #Ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
132 funcGetStepPredictors <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
133 ### Retrieve the predictors of the reduced model after stepwise model selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
134 (lmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
135 ### Linear model resulting from step-wise model selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
136 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
137 strLog
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
138 ### File to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
139 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
140 #Document
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
141 funcWrite( "#model", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
142 funcWrite( lmod$fit, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
143 #TODO funcWrite( lmod$train.error, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
144 #TODO funcWrite( lmod$Terms, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
145 funcWrite( "#summary-gbm", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
146 funcWrite( summary(lmod), strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
147
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
148 #Get Names from coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
149 asStepCoefsFactors = coefficients(lmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
150 astrCoefNames = setdiff(names(asStepCoefsFactors[as.vector(!is.na(asStepCoefsFactors))==TRUE]),"(Intercept)")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
151 asStepCoefsFactors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
152
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
153 if(length(asStepCoefsFactors)<1){ return(NA) }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
154 return( asStepCoefsFactors )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
155 ### Vector of string predictor names
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
156 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
157
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
158 funcGetUnivariateResults <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
159 ### Reduce the list of list of results to the correct format
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
160 ( llmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
161 ### The list of univariate models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
162 frmeData,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
163 ### Data analysis is performed on
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
164 liTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
165 ### The response id
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
166 dSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
167 ### Significance level for q-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
168 adP,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
169 ### List of pvalues from all associations performed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
170 lsSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
171 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
172 strLog,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
173 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
174 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
175 ### Records of counts associated with quality control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
176 lastrCols,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
177 ### Predictors used in the association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
178 asSuppressCovariates=c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
179 ### Vector of covariates to suppress and not give results for
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
180 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
181 adP = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
182 lsSig = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
183 for(lmod in llmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
184 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
185 adP = c(adP,lmod$adP)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
186 lsSig = c(lsSig,lmod$lsSig)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
187 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
188 return(list(adP=adP, lsSig=lsSig, lsQCCounts=llmod[length(llmod)]$lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
189 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
190
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
191 # OK
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
192 funcGetLMResults <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
193 ### Reduce the lm object return to just the data needed for further analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
194 ( llmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
195 ### The result from a linear model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
196 frmeData,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
197 ### Data analysis is performed on
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
198 liTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
199 ### The response id
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
200 dSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
201 ### Significance level for q-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
202 adP,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
203 ### List of pvalues from all associations performed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
204 lsSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
205 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
206 strLog,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
207 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
208 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
209 ### Records of counts associated with quality control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
210 lastrCols,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
211 ### Predictors used in the association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
212 asSuppressCovariates=c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
213 ### Vector of covariates to suppress and not give results for
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
214 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
215 ilmodIndex = 0
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
216 for( lmod in llmod )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
217 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
218 ilmodIndex = ilmodIndex + 1
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
219 lmod = llmod[[ ilmodIndex ]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
220 iTaxon = liTaxon[[ ilmodIndex ]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
221 astrCols = lastrCols[[ ilmodIndex ]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
222
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
223 #Exclude none and errors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
224 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
225 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
226 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
227 iPValuePosition = 4
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
228
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
229 #Get the column name of the iTaxon index
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
230 #frmeTmp needs to be what?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
231 strTaxon = colnames( frmeData )[iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
232 #Get summary information from the linear model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
233 lsSum = try( summary( lmod ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
234 #The following can actually happen when the stranger regressors return broken results
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
235 if( class( lsSum ) == "try-error" )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
236 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
237 next
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
238 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
239
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
240 #Write summary information to log file
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
241 funcWrite( "#model", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
242 funcWrite( lmod, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
243 funcWrite( "#summary", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
244 #Unbelievably, some of the more unusual regression methods crash out when _printing_ their results
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
245 try( funcWrite( lsSum, strLog ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
246
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
247 #Get the coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
248 #This should work for linear models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
249 frmeCoefs <- try( coefficients( lsSum ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
250
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
251 if( ( class(frmeCoefs ) == "try-error" ) || is.null( frmeCoefs ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
252 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
253 adCoefs = try( coefficients( lmod ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
254 if(class( adCoefs ) == "try-error")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
255 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
256 adCoefs = coef(lmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
257 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
258 frmeCoefs <- NA
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
259 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
260 if( class( frmeCoefs ) == "list" )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
261 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
262 frmeCoefs <- frmeCoefs$count
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
263 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
264 adCoefs = frmeCoefs[,1]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
265 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
266
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
267 #Go through each coefficient
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
268 astrRows <- names( adCoefs )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
269
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
270 ##lmm
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
271 if( is.null( astrRows ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
272 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
273 astrRows = rownames( lsSum$tTable )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
274 frmeCoefs = lsSum$tTable
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
275 iPValuePosition = 5
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
276 adCoefs = frmeCoefs[,1]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
277 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
278
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
279 for( iMetadata in 1:length( astrRows ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
280 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
281 #Current coef which is being evaluated
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
282 strOrig = astrRows[ iMetadata ]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
283 #Skip y interscept
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
284 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
285 #Skip suppressed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
286 if( funcCoef2Col( strOrig, frmeData ) %in% asSuppressCovariates){ next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
287
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
288 #Extract pvalue and std in standard model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
289 dP = frmeCoefs[ strOrig, iPValuePosition ]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
290 dStd = frmeCoefs[ strOrig, 2 ]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
291
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
292 #Attempt to extract the pvalue and std in mixed effects summary
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
293 #Could not get the pvalue so skip result
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
294 if( is.nan( dP ) || is.na( dP ) || is.null( dP ) ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
295
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
296 dCoef = adCoefs[ iMetadata ]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
297
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
298 #Setting adMetadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
299 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
300 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
301 if( is.na( strMetadata ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
302 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
303 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
304 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
305 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
306 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
307 adMetadata <- frmeData[, strMetadata ]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
308
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
309 # Store (factor level modified) p-value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
310 # Store general results for each coef
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
311 adP <- c( adP, dP )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
312
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
313 # Append to the list of information about associations
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
314 lsSig[[ length( lsSig ) + 1 ]] <- list(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
315 # Current metadata name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
316 name = strMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
317 # Current metadatda name (as a factor level if existing as such)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
318 orig = strOrig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
319 # Taxon feature name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
320 taxon = strTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
321 # Taxon data / response
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
322 data = frmeData[, iTaxon ],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
323 # All levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
324 factors = c( strMetadata ),
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
325 # Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
326 metadata = adMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
327 # Current coeficient value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
328 value = dCoef,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
329 # Standard deviation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
330 std = dStd,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
331 # Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
332 allCoefs = adCoefs )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
333 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
334 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
335 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
336 return( list( adP = adP, lsSig = lsSig, lsQCCounts = lsQCCounts ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
337 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
338 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
339
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
340 funcGetZeroInflatedResults <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
341 ### Reduce the lm object return to just the data needed for further analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
342 ( llmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
343 ### The result from a linear model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
344 frmeData,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
345 ### Data analysis is performed on
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
346 liTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
347 ### The response id
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
348 dSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
349 ### Significance level for q-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
350 adP,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
351 ### List of pvalues from all associations performed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
352 lsSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
353 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
354 strLog,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
355 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
356 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
357 ### Records of counts associated with quality control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
358 lastrCols,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
359 ### Predictors used in the association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
360 asSuppressCovariates=c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
361 ### Vector of covariates to suppress and not give results for
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
362 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
363 ilmodIndex = 0
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
364 for(lmod in llmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
365 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
366 ilmodIndex = ilmodIndex + 1
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
367 lmod = llmod[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
368 iTaxon = liTaxon[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
369 astrCols = lastrCols[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
370
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
371 #Exclude none and errors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
372 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
373 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
374 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
375 iPValuePosition = 4
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
376
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
377 #Get the column name of the iTaxon index
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
378 #frmeTmp needs to be what?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
379 strTaxon = colnames( frmeData )[iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
380
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
381 #Write summary information to log file
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
382 funcWrite( "#model", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
383 funcWrite( lmod, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
384
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
385 #Get the coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
386 #This should work for linear models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
387 frmeCoefs <- summary(lmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
388 if(! is.null( frmeCoefs$coefficients$count ) ) # Needed for zeroinfl
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
389 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
390 frmeCoefs = frmeCoefs$coefficients$count
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
391 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
392 adCoefs = frmeCoefs[,1]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
393 names(adCoefs) = row.names(frmeCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
394
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
395 funcWrite( "#Coefs", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
396 funcWrite( frmeCoefs, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
397
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
398 #Go through each coefficient
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
399 astrRows <- row.names( frmeCoefs )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
400
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
401 for( iMetadata in 1:length( astrRows ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
402 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
403 #Current coef which is being evaluated
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
404 strOrig = astrRows[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
405 #Skip y interscept
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
406 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
407 #Skip suppressed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
408 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
409
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
410 #Extract pvalue and std in standard model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
411 dP = frmeCoefs[strOrig, iPValuePosition]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
412 if(is.nan(dP)){next}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
413 dStd = frmeCoefs[strOrig,2]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
414
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
415 dCoef = adCoefs[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
416
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
417 #Setting adMetadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
418 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
419 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
420 if( is.na( strMetadata ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
421 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
422 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
423 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
424 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
425 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
426 adMetadata <- frmeData[,strMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
427
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
428 #Store (factor level modified) p-value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
429 #Store general results for each coef
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
430 adP <- c(adP, dP)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
431 lsSig[[length( lsSig ) + 1]] <- list(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
432 #Current metadata name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
433 name = strMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
434 #Current metadatda name (as a factor level if existing as such)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
435 orig = strOrig,#
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
436 #Taxon feature name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
437 taxon = strTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
438 #Taxon data / response
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
439 data = frmeData[,iTaxon],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
440 #All levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
441 factors = c(strMetadata),
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
442 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
443 metadata = adMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
444 #Current coeficient value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
445 value = dCoef,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
446 #Standard deviation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
447 std = dStd,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
448 #Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
449 allCoefs = adCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
450 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
451 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
452 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
453 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
454 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
455 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
456
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
457 oldfuncGetZeroInflatedResults <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
458 ### Reduce the lm object return to just the data needed for further analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
459 ( llmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
460 ### The result from a linear model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
461 frmeData,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
462 ### Data analysis is performed on
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
463 liTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
464 ### The response id
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
465 dSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
466 ### Significance level for q-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
467 adP,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
468 ### List of pvalues from all associations performed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
469 lsSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
470 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
471 strLog,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
472 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
473 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
474 ### Records of counts associated with quality control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
475 lastrCols,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
476 ### Predictors used in the association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
477 asSuppressCovariates=c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
478 ### Vector of covariates to suppress and not give results for
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
479 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
480 ilmodIndex = 0
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
481 for(lmod in llmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
482 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
483 ilmodIndex = ilmodIndex + 1
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
484 lmod = llmod[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
485 iTaxon = liTaxon[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
486 astrCols = lastrCols[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
487
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
488 #Exclude none and errors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
489 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
490 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
491 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
492 iPValuePosition = 4
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
493
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
494 #Get the column name of the iTaxon index
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
495 #frmeTmp needs to be what?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
496 strTaxon = colnames( frmeData )[iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
497
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
498 #Write summary information to log file
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
499 funcWrite( "#model", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
500 funcWrite( lmod, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
501
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
502 #Get the coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
503 #This should work for linear models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
504 frmeCoefs <- summary(lmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
505 adCoefs = frmeCoefs[,1]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
506 names(adCoefs) = row.names(frmeCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
507
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
508 #Go through each coefficient
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
509 astrRows <- row.names( frmeCoefs )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
510
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
511 for( iMetadata in 1:length( astrRows ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
512 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
513 #Current coef which is being evaluated
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
514 strOrig = astrRows[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
515 #Skip y interscept
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
516 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
517 #Skip suppressed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
518 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
519
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
520 #Extract pvalue and std in standard model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
521 dP = frmeCoefs[strOrig, iPValuePosition]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
522 dStd = frmeCoefs[strOrig,2]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
523
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
524 dCoef = adCoefs[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
525
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
526 #Setting adMetadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
527 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
528 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
529 if( is.na( strMetadata ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
530 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
531 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
532 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
533 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
534 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
535 adMetadata <- frmeData[,strMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
536
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
537 #Store (factor level modified) p-value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
538 #Store general results for each coef
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
539 adP <- c(adP, dP)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
540 lsSig[[length( lsSig ) + 1]] <- list(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
541 #Current metadata name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
542 name = strMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
543 #Current metadatda name (as a factor level if existing as such)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
544 orig = strOrig,#
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
545 #Taxon feature name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
546 taxon = strTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
547 #Taxon data / response
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
548 data = frmeData[,iTaxon],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
549 #All levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
550 factors = c(strMetadata),
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
551 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
552 metadata = adMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
553 #Current coeficient value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
554 value = dCoef,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
555 #Standard deviation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
556 std = dStd,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
557 #Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
558 allCoefs = adCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
559 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
560 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
561 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
562 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
563 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
564 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
565
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
566
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
567 notfuncGetZeroInflatedResults <- function
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
568 ### Reduce the lm object return to just the data needed for further analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
569 ( llmod,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
570 ### The result from a linear model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
571 frmeData,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
572 ### Data analysis is performed on
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
573 liTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
574 ### The response id
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
575 dSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
576 ### Significance level for q-values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
577 adP,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
578 ### List of pvalues from all associations performed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
579 lsSig,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
580 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
581 strLog,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
582 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
583 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
584 ### Records of counts associated with quality control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
585 lastrCols,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
586 ### Predictors used in the association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
587 asSuppressCovariates=c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
588 ### Vector of covariates to suppress and not give results for
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
589 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
590 ilmodIndex = 0
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
591 for(lmod in llmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
592 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
593 ilmodIndex = ilmodIndex + 1
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
594 lmod = llmod[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
595 iTaxon = liTaxon[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
596 astrCols = lastrCols[[ilmodIndex]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
597
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
598 #Exclude none and errors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
599 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
600 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
601 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
602 iPValuePosition = 4
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
603
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
604 #Get the column name of the iTaxon index
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
605 #frmeTmp needs to be what?
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
606 strTaxon = colnames( frmeData )[iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
607
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
608 #Write summary information to log file
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
609 funcWrite( "#model", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
610 funcWrite( lmod, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
611
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
612 #Get the coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
613 #This should work for linear models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
614 frmeCoefs <- summary(lmod)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
615 frmeCoefs = frmeCoefs$coefficients$count
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
616 funcWrite( "#Coefs", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
617 funcWrite( frmeCoefs, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
618
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
619 adCoefs = frmeCoefs[,1]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
620 names(adCoefs) = row.names(frmeCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
621
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
622 #Go through each coefficient
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
623 astrRows <- row.names( frmeCoefs )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
624
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
625 for( iMetadata in 1:length( astrRows ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
626 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
627 #Current coef which is being evaluated
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
628 strOrig = astrRows[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
629
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
630 #Skip y interscept
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
631 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
632 #Skip suppressed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
633 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
634
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
635 #Extract pvalue and std in standard model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
636 dP = frmeCoefs[strOrig, iPValuePosition]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
637 if(is.nan(dP)){ next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
638 dStd = frmeCoefs[strOrig,2]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
639 dCoef = adCoefs[iMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
640
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
641 #Setting adMetadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
642 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
643 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
644 if( is.na( strMetadata ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
645 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
646 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
647 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
648 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
649 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
650 adMetadata <- frmeData[,strMetadata]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
651
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
652 #Store (factor level modified) p-value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
653 #Store general results for each coef
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
654 adP <- c(adP, dP)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
655 lsSig[[length( lsSig ) + 1]] <- list(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
656 #Current metadata name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
657 name = strMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
658 #Current metadatda name (as a factor level if existing as such)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
659 orig = strOrig,#
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
660 #Taxon feature name
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
661 taxon = strTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
662 #Taxon data / response
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
663 data = frmeData[,iTaxon],
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
664 #All levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
665 factors = c(strMetadata),
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
666 #Metadata values
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
667 metadata = adMetadata,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
668 #Current coeficient value
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
669 value = dCoef,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
670 #Standard deviation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
671 std = dStd,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
672 #Model coefficients
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
673 allCoefs = adCoefs)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
674 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
675 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
676 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
677 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
678 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
679 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
680
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
681
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
682 ### Options for variable selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
683 # OK
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
684 funcBoostModel <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
685 ### Perform model selection / regularization with boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
686 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
687 ### The formula of the full model before boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
688 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
689 ### The data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
690 adCur,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
691 ### The response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
692 lsParameters,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
693 ### User controlled parameters needed specific to boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
694 lsForcedParameters = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
695 ### Force these predictors to be in the model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
696 strLog
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
697 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
698 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
699 funcWrite( c("#Boost formula", strFormula), strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
700 lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="laplace", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
701 #TODO# lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="gaussian", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
702
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
703 astrTerms <- c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
704 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
705 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
706 #Get boosting summary results
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
707 lsSum <- summary( lmod, plotit = FALSE )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
708
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
709 #Document
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
710 funcWrite( "#model-gbm", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
711 funcWrite( lmod$fit, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
712 funcWrite( lmod$train.error, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
713 funcWrite( lmod$Terms, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
714 funcWrite( "#summary-gbm", strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
715 funcWrite( lsSum, strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
716
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
717 # Uneven metadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
718 vstrUneven = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
719 # Kept metadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
720 vstrKeepMetadata = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
721
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
722 #Select model predictors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
723 #Check the frequency of selection and skip if not selected more than set threshold dFreq
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
724 for( strMetadata in lmod$var.names )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
725 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
726 #Get the name of the metadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
727 strTerm <- funcCoef2Col( strMetadata, frmeTmp, c(astrMetadata, astrGenetics) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
728
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
729 #Add back in uneven metadata
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
730 if(fAddBack)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
731 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
732 ldMetadata = frmeTmp[[strMetadata]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
733 if(length(which(table(ldMetadata)/length(ldMetadata)>dUnevenMax))>0)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
734 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
735 astrTerms <- c(astrTerms, strTerm)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
736 vstrUneven = c(vstrUneven,strMetadata)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
737 next
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
738 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
739 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
740
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
741 #If the selprob is less than a certain frequency, skip
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
742 dSel <- lsSum$rel.inf[which( lsSum$var == strMetadata )] / 100
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
743 if( is.na(dSel) || ( dSel <= lsParameters$dFreq ) ){ next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
744 #TODO# if( is.na(dSel) || ( dSel < lsParameters$dFreq ) ){ next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
745
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
746 #If you should ignore the metadata, continue
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
747 if( is.null( strTerm ) ) { next }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
748
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
749 #If you cant find the metadata name, write
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
750 if( is.na( strTerm ) )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
751 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
752 c_logrMaaslin$error( "Unknown coefficient: %s", strMetadata )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
753 next
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
754 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
755
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
756 #Collect metadata names
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
757 astrTerms <- c(astrTerms, strTerm)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
758 vstrKeepMetadata = c(vstrKeepMetadata,strTerm)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
759 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
760 } else { astrTerms = lsForcedParameters }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
761
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
762 # funcBoostInfluencePlot(vdRelInf=lsSum$rel.inf, sFeature=lsParameters$sBugName, vsPredictorNames=lsSum$var, vstrKeepMetadata=vstrKeepMetadata, vstrUneven=vstrUneven)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
763
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
764 return(unique(c(astrTerms,lsForcedParameters)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
765 ### Return a vector of predictor names to use in a reduced model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
766 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
767
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
768 #Glmnet default is to standardize the variables.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
769 #used as an example for implementation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
770 #http://r.789695.n4.nabble.com/estimating-survival-times-with-glmnet-and-coxph-td4614225.html
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
771 funcPenalizedModel <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
772 ### Perform penalized regularization for variable selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
773 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
774 ### The formula of the full model before boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
775 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
776 ### The data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
777 adCur,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
778 ### The response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
779 lsParameters,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
780 ### User controlled parameters needed specific to boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
781 lsForcedParameters = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
782 ### Force these predictors to be in the model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
783 strLog
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
784 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
785 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
786 #Convert the data frame to a model matrix
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
787 mtrxDesign = model.matrix(as.formula(strFormula), data=frmeTmp)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
788
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
789 #Cross validate the lambda
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
790 cvRet = cv.glmnet(x=mtrxDesign,y=adCur,alpha=lsParameters$dPAlpha)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
791
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
792 #Perform lasso
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
793 glmnetMod = glmnet(x=mtrxDesign,y=adCur,family=lsParameters$family,alpha=lsParameters$dPAlpha,lambda=cvRet$lambda.min)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
794
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
795 #Get non zero beta and return column names for covariate names.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
796 ldBeta = glmnetMod$beta[,which.max(glmnetMod$dev.ratio)]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
797 ldBeta = names(ldBeta[which(abs(ldBeta)>0)])
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
798 return(sapply(ldBeta,funcCoef2Col,frmeData=frmeTmp))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
799 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
800
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
801 # OK
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
802 funcForwardModel <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
803 ### Perform model selection with forward stepwise selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
804 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
805 ### lm style string defining reposne and predictors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
806 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
807 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
808 adCur,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
809 ### Response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
810 lsParameters,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
811 ### User controlled parameters needed specific to boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
812 lsForcedParameters = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
813 ### Force these predictors to be in the model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
814 strLog
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
815 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
816 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
817 funcWrite( c("#Forward formula", strFormula), strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
818
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
819 strNullFormula = "adCur ~ 1"
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
820 if(!is.null(lsForcedParameters))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
821 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
822 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
823 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
824 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
825 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
826 if(!("try-error" %in% c(class( lmodNull ),class( lmodFull ))))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
827 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
828 lmod = stepAIC(lmodNull, scope=list(lower=lmodNull,upper=lmodFull), direction="forward", trace=0)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
829 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
830 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
831 return( lsForcedParameters )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
832 ### Return a vector of predictor names to use in a reduced model or NA on error
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
833 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
834
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
835 # OK
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
836 # Select model with backwards selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
837 funcBackwardsModel <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
838 ### Perform model selection with backwards stepwise selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
839 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
840 ### lm style string defining reponse and predictors
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
841 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
842 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
843 adCur,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
844 ### Response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
845 lsParameters,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
846 ### User controlled parameters needed specific to boosting
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
847 lsForcedParameters = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
848 ### Force these predictors to be in the model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
849 strLog
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
850 ### File to which to document logging
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
851 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
852 funcWrite( c("#Backwards formula", strFormula), strLog )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
853
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
854 strNullFormula = "adCur ~ 1"
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
855 if(!is.null(lsForcedParameters))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
856 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
857 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
858 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
859
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
860 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
861 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
862
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
863 if(! class( lmodFull ) == "try-error" )
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
864 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
865 lmod = stepAIC(lmodFull, scope=list(lower=lmodNull, upper=lmodFull), direction="backward")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
866 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
867 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
868 return( lsForcedParameters ) }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
869 ### Return a vector of predictor names to use in a reduced model or NA on error
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
870 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
871
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
872 ### Analysis methods
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
873 ### Univariate options
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
874
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
875 # Sparse Dir. Model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
876 #TODO# Implement in sfle
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
877
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
878 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
879 # Correlation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
880 # NOTE: Ignores the idea of random and fixed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
881 funcSpearman <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
882 ### Perform multiple univariate comparisons producing spearman correlations for association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
883 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
884 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
885 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
886 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
887 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
888 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
889 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
890 ### List recording anything important to QC
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
891 strRandomFormula = NULL
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
892 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
893 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
894 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
895 functionContrast=function(x,adCur,dfData)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
896 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
897 retList = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
898 ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
899 #Returning rho for the coef in a named vector
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
900 vdCoef = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
901 vdCoef[[x]]=ret$estimate
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
902 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
903 return(retList)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
904 }, lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
905 ### List of contrast information, pvalue, contrast and std per univariate test
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
906 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
907
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
908 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
909 # Wilcoxon (T-Test)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
910 # NOTE: Ignores the idea of random and fixed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
911 funcWilcoxon <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
912 ### Perform multiple univariate comparisons performing wilcoxon tests on discontinuous data with 2 levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
913 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
914 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
915 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
916 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
917 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
918 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
919 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
920 ### List recording anything important to QC
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
921 strRandomFormula = NULL
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
922 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
923 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
924 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
925 functionContrast=function(x,adCur,dfData)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
926 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
927 retList = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
928 ret = wilcox.test(as.formula(paste("adCur",x,sep=" ~ ")), data=dfData, na.action=c_strNA_Action)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
929 #Returning NA for the coef in a named vector
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
930 vdCoef = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
931 vdCoef[[x]]=ret$statistic
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
932 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
933 return(retList)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
934 }, lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
935 ### List of contrast information, pvalue, contrast and std per univariate test
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
936 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
937
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
938 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
939 # Kruskal.Wallis (Nonparameteric anova)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
940 # NOTE: Ignores the idea of random and fixed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
941 funcKruskalWallis <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
942 ### Perform multiple univariate comparisons performing Kruskal wallis rank sum tests on discontuous data with more than 2 levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
943 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
944 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
945 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
946 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
947 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
948 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
949 lsQCCounts,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
950 ### List recording anything important to QC
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
951 strRandomFormula = NULL
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
952 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
953 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
954 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
955 functionContrast=function(x,adCur,dfData)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
956 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
957 retList = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
958 lmodKW = kruskal(adCur,dfData[[x]],group=FALSE,p.adj="holm")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
959
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
960 asLevels = levels(dfData[[x]])
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
961 # The names of the generated comparisons, sometimes the control is first sometimes it is not so
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
962 # We will just check which is in the names and use that
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
963 asComparisons = row.names(lmodKW$comparisons)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
964 #Get the comparison with the control
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
965 for(sLevel in asLevels[2:length(asLevels)])
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
966 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
967 sComparison = intersect(c(paste(asLevels[1],sLevel,sep=" - "),paste(sLevel,asLevels[1],sep=" - ")),asComparisons)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
968 #Returning NA for the coef in a named vector
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
969 vdCoef = c()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
970 vdCoef[[paste(x,sLevel,sep="")]]=lmodKW$comparisons[sComparison,"Difference"]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
971 # vdCoef[[paste(x,sLevel,sep="")]]=NA
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
972 retList[[length(retList)+1]]=list(p.value=lmodKW$comparisons[sComparison,"p.value"],SD=1.0,name=paste(x,sLevel,sep=""),coef=vdCoef)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
973 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
974 return(retList)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
975 }, lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
976 ### List of contrast information, pvalue, contrast and std per univariate test
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
977 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
978
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
979 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
980 # NOTE: Ignores the idea of random and fixed covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
981 funcDoUnivariate <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
982 ### Perform multiple univariate comparisons producing spearman correlations for association
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
983 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
984 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
985 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
986 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
987 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
988 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
989 lsHistory,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
990 ### List recording p-values, association information, and QC counts
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
991 strRandomFormula = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
992 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
993 fZeroInflate = FALSE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
994 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
995 if(fZeroInflate)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
996 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
997 throw("There are no zero-inflated univariate models to perform your analysis.")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
998 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
999
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1000 # Get covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1001 astrCovariates = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1002
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1003 # For each covariate
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1004 for(sCovariate in astrCovariates)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1005 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1006 ## Check to see if it is discrete
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1007 axData = frmeTmp[[sCovariate]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1008 lsRet = NA
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1009 if(is.factor(axData) || is.logical(axData))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1010 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1011 ## If discrete check how many levels
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1012 lsDataLevels = levels(axData)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1013 ## If 2 levels do wilcoxon test
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1014 if(length(lsDataLevels) < 3)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1015 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1016 lsRet = funcWilcoxon(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1017 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1018 ## If 3 or more levels do kruskal wallis test
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1019 lsRet = funcKruskalWallis(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1020 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1021 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1022 ## If not discrete do spearman test (list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1023 lsRet = funcSpearman(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1024 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1025 lsHistory[["adP"]] = c(lsHistory[["adP"]], lsRet[["adP"]])
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1026 lsHistory[["lsSig"]] = c(lsHistory[["lsSig"]], lsRet[["lsSig"]])
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1027 lsHistory[["lsQCCounts"]] = lsRet[["lsQCCounts"]]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1028 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1029 return(lsHistory)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1030 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1031
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1032 ### Multivariate
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1033
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1034 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1035 funcLM <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1036 ### Perform vanilla linear regression
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1037 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1038 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1039 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1040 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1041 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1042 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1043 lsHistory,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1044 ### List recording p-values, association information, and QC counts
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1045 strRandomFormula = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1046 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1047 fZeroInflated = FALSE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1048 ### Turns on the zero inflated model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1049 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1050 adCur = frmeTmp[,iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1051 if(fZeroInflated)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1052 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1053 return(try(gamlss(formula=as.formula(strFormula), family=BEZI, data=frmeTmp))) # gamlss
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1054 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1055 if(!is.null(strRandomFormula))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1056 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1057 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=gaussian(link="identity"), data=frmeTmp)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1058 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1059 #return(try( lmer(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1060 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1061 return(try( lm(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1062 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1063 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1064 ### lmod result object from lm
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1065 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1066
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1067 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1068 funcBinomialMult <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1069 ### Perform linear regression with negative binomial link
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1070 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1071 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1072 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1073 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1074 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1075 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1076 lsHistory,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1077 ### List recording p-values, association information, and QC counts
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1078 strRandomFormula = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1079 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1080 fZeroInflated = FALSE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1081 ### Turns on the zero inflated model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1082 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1083 adCur = frmeTmp[,iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1084 if(fZeroInflated)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1085 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1086 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="negbin"))) # pscl
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1087 # return(try(gamlss(as.formula(strFormula), family=ZINBI, data=frmeTmp))) # pscl
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1088 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1089 if(!is.null(strRandomFormula))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1090 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1091 throw("This analysis flow is not completely developed, please choose an option other than negative bionomial with random covariates")
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1092 #TODO need to estimate the theta
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1093 #return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=negative.binomial(theta = 2, link=log), data=frmeTmp)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1094 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1095 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1096 return(try( glm.nb(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action )))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1097 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1098 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1099 ### lmod result object from lm
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1100 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1101
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1102 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1103 funcQuasiMult <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1104 ### Perform linear regression with quasi-poisson link
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1105 strFormula,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1106 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1107 frmeTmp,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1108 ### Data on which to perform analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1109 iTaxon,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1110 ### Index of the response data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1111 lsHistory,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1112 ### List recording p-values, association information, and QC counts
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1113 strRandomFormula = NULL,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1114 ### Has the formula for random covariates
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1115 fZeroInflated = FALSE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1116 ### Turns on a zero infalted model
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1117 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1118 adCur = frmeTmp[,iTaxon]
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1119 if(fZeroInflated)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1120 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1121 # return(try(gamlss(formula=as.formula(strFormula), family=ZIP, data=frmeTmp))) # gamlss
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1122 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="poisson"))) # pscl
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1123 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1124 #Check to see if | is in the model, if so use a lmm otherwise the standard glm is ok
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1125 if(!is.null(strRandomFormula))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1126 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1127 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family= quasipoisson, data=frmeTmp)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1128 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1129 #return(try ( glmer(as.formula(strFormula), data=frmeTmp, family=quasipoisson, na.action=c_strNA_Action) ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1130 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1131 return(try( glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action) ))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1132 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1133 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1134 ### lmod result object from lm
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1135 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1136
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1137 ### Transformations
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1138 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1139 funcArcsinSqrt <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1140 # Transform data with arcsin sqrt transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1141 aData
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1142 ### The data on which to perform the transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1143 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1144 return(asin(sqrt(aData)))
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1145 ### Transformed data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1146 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1147
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1148 funcSquareSin <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1149 # Transform data with square sin transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1150 # Opposite of the funcArcsinSqrt
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1151 aData
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1152 ### The data on which to perform the transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1153 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1154 return(sin(aData)^2)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1155 ### Transformed data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1156 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1157
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1158 # Tested
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1159 funcNoTransform <-function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1160 ### Pass data without transform
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1161 aData
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1162 ### The data on which to perform the transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1163 ### Only given here to preserve the pattern, not used.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1164 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1165 return(aData)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1166 ### Transformed data
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1167 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1168
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1169 funcGetAnalysisMethods <- function(
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1170 ### Returns the appropriate functions for regularization, analysis, data transformation, and analysis object inspection.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1171 ### This allows modular customization per analysis step.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1172 ### To add a new method insert an entry in the switch for either the selection, transform, or method
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1173 ### Insert them by using the pattern optparse_keyword_without_quotes = function_in_AnalysisModules
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1174 ### Order in the return listy is currently set and expected to be selection, transforms/links, analysis method
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1175 ### none returns null
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1176 sModelSelectionKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1177 ### Keyword defining the method of model selection
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1178 sTransformKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1179 ### Keyword defining the method of data transformation
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1180 sMethodKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1181 ### Keyword defining the method of analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1182 fZeroInflated = FALSE
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1183 ### Indicates if using zero inflated models
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1184 ){
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1185 lRetMethods = list()
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1186 #Insert selection methods here
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1187 lRetMethods[[c_iSelection]] = switch(sModelSelectionKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1188 boost = funcBoostModel,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1189 penalized = funcPenalizedModel,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1190 forward = funcForwardModel,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1191 backward = funcBackwardsModel,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1192 none = NA)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1193
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1194 #Insert transforms
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1195 lRetMethods[[c_iTransform]] = switch(sTransformKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1196 asinsqrt = funcArcsinSqrt,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1197 none = funcNoTransform)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1198
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1199 #Insert untransform
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1200 lRetMethods[[c_iUnTransform]] = switch(sTransformKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1201 asinsqrt = funcNoTransform,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1202 none = funcNoTransform)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1203
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1204 #Insert analysis
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1205 lRetMethods[[c_iAnalysis]] = switch(sMethodKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1206 neg_binomial = funcBinomialMult,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1207 quasi = funcQuasiMult,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1208 univariate = funcDoUnivariate,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1209 lm = funcLM,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1210 none = NA)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1211
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1212 # If a univariate method is used it is required to set this to true
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1213 # For correct handling.
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1214 lRetMethods[[c_iIsUnivariate]]=sMethodKey=="univariate"
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1215
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1216 #Insert method to get results
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1217 if(fZeroInflated)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1218 {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1219 lRetMethods[[c_iResults]] = switch(sMethodKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1220 neg_binomial = funcGetZeroInflatedResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1221 quasi = funcGetZeroInflatedResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1222 univariate = funcGetUnivariateResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1223 lm = funcGetZeroInflatedResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1224 none = NA)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1225 } else {
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1226 lRetMethods[[c_iResults]] = switch(sMethodKey,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1227 neg_binomial = funcGetLMResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1228 quasi = funcGetLMResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1229 univariate = funcGetUnivariateResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1230 lm = funcGetLMResults,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1231 none = NA)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1232 }
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1233
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1234 return(lRetMethods)
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1235 ### Returns a list of functions to be passed for regularization, data transformation, analysis,
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1236 ### and custom analysis results introspection functions to pull from return objects data of interest
ca61989bc3b4 Uploaded
george-weingart
parents:
diff changeset
1237 }