0
|
1 #####################################################################################
|
|
2 #Copyright (C) <2012>
|
|
3 #
|
|
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5 #this software and associated documentation files (the "Software"), to deal in the
|
|
6 #Software without restriction, including without limitation the rights to use, copy,
|
|
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
8 #and to permit persons to whom the Software is furnished to do so, subject to
|
|
9 #the following conditions:
|
|
10 #
|
|
11 #The above copyright notice and this permission notice shall be included in all copies
|
|
12 #or substantial portions of the Software.
|
|
13 #
|
|
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
20 #
|
|
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
|
|
22 # authored by the Huttenhower lab at the Harvard School of Public Health
|
|
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
|
|
24 #####################################################################################
|
|
25
|
|
26 inlinedocs <- function(
|
|
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
|
|
28 ##description<< Collection of minor utility scripts
|
|
29 ) { return( pArgs ) }
|
|
30
|
|
31 #source("Constants.R")
|
|
32
|
|
33 funcRename <- function(
|
|
34 ### Modifies labels for plotting
|
|
35 ### If the name is not an otu collapse to the last two clades
|
|
36 ### Otherwise use the most terminal clade
|
|
37 astrNames
|
|
38 ### Names to modify for plotting
|
|
39 ){
|
|
40 astrRet <- c()
|
|
41 for( strName in astrNames )
|
|
42 {
|
|
43 astrName <- strsplit( strName, c_cFeatureDelimRex )[[1]]
|
|
44 i <- length( astrName )
|
|
45 if( ( astrName[i] == c_strUnclassified ) || !is.na( as.numeric( astrName[i] ) ) )
|
|
46 {
|
|
47 strRet <- paste( astrName[( i - 1 ):i], collapse = c_cFeatureDelim )
|
|
48 } else {
|
|
49 strRet <- astrName[i]
|
|
50 }
|
|
51 astrRet <- c(astrRet, strRet)
|
|
52 }
|
|
53 return( astrRet )
|
|
54 ### List of modified names
|
|
55 }
|
|
56
|
|
57 funcBonferonniCorrectFactorData <- function
|
|
58 ### Bonferroni correct for factor data
|
|
59 (dPvalue,
|
|
60 ### P-value to correct
|
|
61 vsFactors,
|
|
62 ### Factors of the data to correct
|
|
63 fIgnoreNAs = TRUE
|
|
64 ){
|
|
65 vsUniqueFactors = unique( vsFactors )
|
|
66 if( fIgnoreNAs ){ vsUniqueFactors = setdiff( vsUniqueFactors, c("NA","na","Na","nA") ) }
|
|
67 return( dPvalue * max( 1, ( length( vsUniqueFactors ) - 1 ) ) )
|
|
68 ### Numeric p-value that is correct for levels (excluding NA levels)
|
|
69 }
|
|
70
|
|
71 funcCalculateTestCounts <- function(
|
|
72 ### Calculates the number of tests used in inference
|
|
73 iDataCount,
|
|
74 asMetadata,
|
|
75 asForced,
|
|
76 asRandom,
|
|
77 fAllvAll
|
|
78 ){
|
|
79 iMetadata = length(asMetadata)
|
|
80 iForced = length(setdiff(intersect( asForced, asMetadata ), asRandom))
|
|
81 iRandom = length(intersect( asRandom, asMetadata ))
|
|
82 if(fAllvAll)
|
|
83 {
|
|
84 #AllvAll flow formula
|
|
85 return((iMetadata-iForced-iRandom) * iDataCount)
|
|
86 }
|
|
87
|
|
88 #Normal flow formula
|
|
89 return((iMetadata-iRandom) * iDataCount)
|
|
90 }
|
|
91
|
|
92 funcGetRandomColors=function(
|
|
93 #Generates a given number of random colors
|
|
94 tempNumberColors = 1
|
|
95 ### Number of colors to generate
|
|
96 ){
|
|
97 adRet = c()
|
|
98 return(sapply(1:tempNumberColors, function(x){
|
|
99 adRGB <- ( runif( 3 ) * 0.66 ) + 0.33
|
|
100 adRet <- c(adRet, rgb( adRGB[1], adRGB[2], adRGB[3] ))
|
|
101 }))
|
|
102 }
|
|
103
|
|
104 funcCoef2Col <- function(
|
|
105 ### Searches through a dataframe and looks for a column that would match the coefficient
|
|
106 ### by the name of the column or the column name and level appended together.
|
|
107 strCoef,
|
|
108 ### String coefficient name
|
|
109 frmeData,
|
|
110 ### Data frame of data
|
|
111 astrCols = c()
|
|
112 ### Column names of interest (if NULL is given, all column names are inspected).
|
|
113 ){
|
|
114 #If the coefficient is the intercept there is no data column to return so return null
|
|
115 if( strCoef %in% c("(Intercept)", "Intercept") ) { return( NULL ) }
|
|
116 #Remove ` from coefficient
|
|
117 strCoef <- gsub( "`", "", strCoef )
|
|
118
|
|
119 #If the coefficient name is not in the data frame
|
|
120 if( !( strCoef %in% colnames( frmeData ) ) )
|
|
121 {
|
|
122 fHit <- FALSE
|
|
123 #If the column names are not provided, use the column names of the dataframe.
|
|
124 if( is.null( astrCols ) ){astrCols <- colnames( frmeData )}
|
|
125
|
|
126 #Search through the different column names (factors)
|
|
127 for( strFactor in astrCols )
|
|
128 {
|
|
129 #Select a column, if it is not a factor or does not begin with the factor's name then skip
|
|
130 adCur <- frmeData[,strFactor]
|
|
131 if( ( class( adCur ) != "factor" ) ||
|
|
132 ( substr( strCoef, 1, nchar( strFactor ) ) != strFactor ) ) { next }
|
|
133
|
|
134 #For the factors, create factor-level name combinations to read in factors
|
|
135 #Then check to see the factor-level combination is the coefficient of interest
|
|
136 #If it is then store that factor as the coefficient of interest
|
|
137 #And break
|
|
138 for( strValue in levels( adCur ) )
|
|
139 {
|
|
140 strCur <- paste( strFactor, strValue, sep = c_sFactorNameSep )
|
|
141 if( strCur == strCoef )
|
|
142 {
|
|
143 strCoef <- strFactor
|
|
144 fHit <- TRUE
|
|
145 break
|
|
146 }
|
|
147 }
|
|
148
|
|
149 #If the factor was found, return
|
|
150 if( fHit ){break }
|
|
151 }
|
|
152 }
|
|
153
|
|
154 #If the original coefficient or the coefficient factor combination name are in the
|
|
155 #data frame, return the name. Otherwise return NA.
|
|
156 return( ifelse( ( strCoef %in% colnames( frmeData ) ), strCoef, NA ) )
|
|
157 ### Coefficient name
|
|
158 }
|
|
159
|
|
160 funcColToMFAValue = function(
|
|
161 ### Given a column name, return the MFA values that could be associated with the name
|
|
162 lsColNames,
|
|
163 ### String list of column names (as you would get from names(dataframe))
|
|
164 dfData
|
|
165 ### Data frame of data the column names refer to
|
|
166 ){
|
|
167 lsMFAValues = c()
|
|
168
|
|
169 for(sColName in lsColNames)
|
|
170 {
|
|
171 axCur = dfData[[sColName]]
|
|
172
|
|
173 if(is.logical(axCur)){axCur=as.factor(axCur)}
|
|
174 if(is.factor(axCur))
|
|
175 {
|
|
176 lsLevels = levels(axCur)
|
|
177 if((length(lsLevels)==2) && (!is.na(as.numeric(lsLevels[1]))) && (!is.na(as.numeric(lsLevels[2]))))
|
|
178 {
|
|
179 lsMFAValues = c(lsMFAValues,paste(sColName,lsLevels[1],sep=c_sMFANameSep1),paste(sColName,lsLevels[2],sep=c_sMFANameSep1))
|
|
180 }else{
|
|
181 for(sLevel in levels(axCur))
|
|
182 {
|
|
183 lsMFAValues = c(lsMFAValues,sLevel)
|
|
184 }
|
|
185 }
|
|
186 } else {
|
|
187 lsMFAValues = c(lsMFAValues,sColName)
|
|
188 }
|
|
189 }
|
|
190 return(setdiff(lsMFAValues,c("NA",NA)))
|
|
191 }
|
|
192
|
|
193 funcMFAValue2Col = function(
|
|
194 ### Given a value in a column, the column name is returned.
|
|
195 xValue,
|
|
196 dfData,
|
|
197 aiColumnIndicesToSearch = NULL
|
|
198 ){
|
|
199 lsColumnNames = names(dfData)
|
|
200
|
|
201 if(is.null(aiColumnIndicesToSearch))
|
|
202 {
|
|
203 aiColumnIndicesToSearch = c(1:dim(dfData)[2])
|
|
204 }
|
|
205
|
|
206 # Could be the column name
|
|
207 if(xValue %in% lsColumnNames){return(xValue)}
|
|
208
|
|
209 # Could be the column name and value
|
|
210 iValueLength = length(xValue)
|
|
211 for( iColIndex in c(1:length(lsColumnNames) ))
|
|
212 {
|
|
213 adCur = dfData[[lsColumnNames[iColIndex]]]
|
|
214 if(is.factor(adCur))
|
|
215 {
|
|
216 for(strValue in levels(adCur))
|
|
217 {
|
|
218 strCurVersion1 <- paste( lsColumnNames[iColIndex], strValue, sep = c_sMFANameSep1 )
|
|
219 strCurVersion2 <- paste( lsColumnNames[iColIndex], strValue, sep = c_sMFANameSep2 )
|
|
220 if((xValue == strCurVersion1) || (xValue == strCurVersion2)){return(lsColumnNames[iColIndex])}
|
|
221 }
|
|
222 }
|
|
223 }
|
|
224
|
|
225 # Could be the value
|
|
226 for(iColIndex in aiColumnIndicesToSearch)
|
|
227 {
|
|
228 if(xValue %in% dfData[[lsColumnNames[iColIndex]]]){return(lsColumnNames[iColIndex])}
|
|
229 }
|
|
230 return(NULL)
|
|
231 }
|
|
232
|
|
233 funcColorHelper <- function(
|
|
234 ### Makes sure the max is max and the min is min, and dmed is average
|
|
235 dMax = 1,
|
|
236 ### Max number
|
|
237 dMin = -1,
|
|
238 ### Min number
|
|
239 dMed = NA
|
|
240 ### Average value
|
|
241 ){
|
|
242 #Make sure max is max and min is min
|
|
243 vSort = sort(c(dMin,dMax))
|
|
244 return( list( dMin = vSort[1], dMax = vSort[2], dMed = ifelse((is.na(dMed)), (dMin+dMax)/2.0, dMed ) ))
|
|
245 ### List of min, max and med numbers
|
|
246 }
|
|
247
|
|
248 funcColor <- function(
|
|
249 ### Generate a color based on a number that is forced to be between a min and max range.
|
|
250 ### The color is based on how far the number is from the center of the given range
|
|
251 ### From red to green (high) are produced with default settings
|
|
252 dX,
|
|
253 ### Number from which to generate the color
|
|
254 dMax = 1,
|
|
255 ### Max possible value
|
|
256 dMin = -1,
|
|
257 ### Min possible value
|
|
258 dMed = NA,
|
|
259 ### Central value if you don't want to be the average
|
|
260 adMax = c(1, 1, 0),
|
|
261 ### Is used to generate the color for the higher values in the range, this can be changed to give different colors set to green
|
|
262 adMin = c(0, 0, 1),
|
|
263 ### Is used to generate the color for the lower values in the range, this can be changed to give different colors set to red
|
|
264 adMed = c(0, 0, 0)
|
|
265 ### Is used to generate the color for the central values in the range, this can be changed to give different colors set to black
|
|
266 ){
|
|
267 lsTmp <- funcColorHelper( dMax, dMin, dMed )
|
|
268 dMax <- lsTmp$dMax
|
|
269 dMin <- lsTmp$dMin
|
|
270 dMed <- lsTmp$dMed
|
|
271 if( is.na( dX ) )
|
|
272 {
|
|
273 dX <- dMed
|
|
274 }
|
|
275 if( dX > dMax )
|
|
276 {
|
|
277 dX <- dMax
|
|
278 } else if( dX < dMin )
|
|
279 {
|
|
280 dX <- dMin }
|
|
281 if( dX < dMed )
|
|
282 {
|
|
283 d <- ( dMed - dX ) / ( dMed - dMin )
|
|
284 adCur <- ( adMed * ( 1 - d ) ) + ( adMin * d )
|
|
285 } else {
|
|
286 d <- ( dMax - dX ) / ( dMax - dMed )
|
|
287 adCur <- ( adMed * d ) + ( adMax * ( 1 - d ) )
|
|
288 }
|
|
289 return( rgb( adCur[1], adCur[2], adCur[3] ) )
|
|
290 ### RGB object
|
|
291 }
|
|
292
|
|
293 funcColors <- function(
|
|
294 ### Generate a range of colors
|
|
295 dMax = 1,
|
|
296 ### Max possible value
|
|
297 dMin = -1,
|
|
298 ### Min possible value
|
|
299 dMed = NA,
|
|
300 ### Central value if you don't want to be the average
|
|
301 adMax = c(1, 1, 0),
|
|
302 ### Is used to generate the color for the higher values in the range, this can be changed to give different colors set to green
|
|
303 adMin = c(0, 0, 1),
|
|
304 ### Is used to generate the color for the lower values in the range, this can be changed to give different colors set to red
|
|
305 adMed = c(0, 0, 0),
|
|
306 ### Is used to generate the color for the central values in the range, this can be changed to give different colors set to black
|
|
307 iSteps = 64
|
|
308 ### Number of intermediary colors made in the range of colors
|
|
309 ){
|
|
310 lsTmp <- funcColorHelper( dMax, dMin, dMed )
|
|
311 dMax <- lsTmp$dMax
|
|
312 dMin <- lsTmp$dMin
|
|
313 dMed <- lsTmp$dMed
|
|
314 aRet <- c ()
|
|
315 for( dCur in seq( dMin, dMax, ( dMax - dMin ) / ( iSteps - 1 ) ) )
|
|
316 {
|
|
317 aRet <- c(aRet, funcColor( dCur, dMax, dMin, dMed, adMax, adMin, adMed ))
|
|
318 }
|
|
319 return( aRet )
|
|
320 ### List of colors
|
|
321 }
|
|
322
|
|
323 funcGetColor <- function(
|
|
324 ### Get a color based on col parameter
|
|
325 ) {
|
|
326 adCol <- col2rgb( par( "col" ) )
|
|
327 return( sprintf( "#%02X%02X%02X", adCol[1], adCol[2], adCol[3] ) )
|
|
328 ### Return hexadecimal color
|
|
329 }
|
|
330
|
|
331 funcTrim=function(
|
|
332 ### Remove whitespace at the beginning or the end of a string
|
|
333 tempString
|
|
334 ### tempString String to be trimmed.
|
|
335 ){
|
|
336 return(gsub("^\\s+|\\s+$","",tempString))
|
|
337 }
|
|
338
|
|
339 funcWrite <- function(
|
|
340 ### Write a string or a table of data
|
|
341 ### This transposes a table before it is written
|
|
342 pOut,
|
|
343 ### String or table to write
|
|
344 strFile
|
|
345 ### File to which to write
|
|
346 ){
|
|
347 if(!is.na(strFile))
|
|
348 {
|
|
349 if( length( intersect( class( pOut ), c("character", "numeric") ) ) )
|
|
350 {
|
|
351 write.table( t(pOut), strFile, quote = FALSE, sep = c_cTableDelimiter, col.names = FALSE, row.names = FALSE, na = "", append = TRUE )
|
|
352 } else {
|
|
353 capture.output( print( pOut ), file = strFile, append = TRUE )
|
|
354 }
|
|
355 }
|
|
356 }
|
|
357
|
|
358 funcWriteTable <- function(
|
|
359 ### Log a table to a file
|
|
360 frmeTable,
|
|
361 ### Table to write
|
|
362 strFile,
|
|
363 ### File to which to write
|
|
364 fAppend = FALSE
|
|
365 ### Append when writing
|
|
366 ){
|
|
367 if(!is.na(strFile))
|
|
368 {
|
|
369 write.table( frmeTable, strFile, quote = FALSE, sep = c_cTableDelimiter, na = "", col.names = NA, append = fAppend )
|
|
370 }
|
|
371 }
|
|
372
|
|
373 funcWriteQCReport <- function(
|
|
374 ### Write out the quality control report
|
|
375 strProcessFileName,
|
|
376 ### File name
|
|
377 lsQCData,
|
|
378 ### List of QC data generated by maaslin to be written
|
|
379 liDataDim,
|
|
380 ### Dimensions of the data matrix
|
|
381 liMetadataDim
|
|
382 ### Dimensions of the metadata matrix
|
|
383 ){
|
|
384 unlink(strProcessFileName)
|
|
385 funcWrite( paste("Initial Metadata Matrix Size: Rows ",liMetadataDim[1]," Columns ",liMetadataDim[2],sep=""), strProcessFileName )
|
|
386 funcWrite( paste("Initial Data Matrix Size: Rows ",liDataDim[1]," Columns ",liDataDim[2],sep=""), strProcessFileName )
|
|
387 funcWrite( paste("\nInitial Data Count: ",length(lsQCData$aiDataInitial),sep=""), strProcessFileName )
|
|
388 funcWrite( paste("Initial Metadata Count: ",length(lsQCData$aiMetadataInitial),sep=""), strProcessFileName )
|
|
389 funcWrite( paste("Data Count after preprocess: ",length(lsQCData$aiAfterPreprocess),sep=""), strProcessFileName )
|
|
390 funcWrite( paste("Removed for missing metadata: ",length(lsQCData$iMissingMetadata),sep=""), strProcessFileName )
|
|
391 funcWrite( paste("Removed for missing data: ",length(lsQCData$iMissingData),sep=""), strProcessFileName )
|
|
392 funcWrite( paste("Number of data with outliers: ",length(which(lsQCData$aiDataSumOutlierPerDatum>0)),sep=""), strProcessFileName )
|
|
393 funcWrite( paste("Number of metadata with outliers: ",length(which(lsQCData$aiMetadataSumOutlierPerDatum>0)),sep=""), strProcessFileName )
|
|
394 funcWrite( paste("Metadata count which survived clean: ",length(lsQCData$aiMetadataCleaned),sep=""), strProcessFileName )
|
|
395 funcWrite( paste("Data count which survived clean: ",length(lsQCData$aiDataCleaned),sep=""), strProcessFileName )
|
|
396 funcWrite( paste("\nBoostings: ",lsQCData$iBoosts,sep=""), strProcessFileName )
|
|
397 funcWrite( paste("Boosting Errors: ",lsQCData$iBoostErrors,sep=""), strProcessFileName )
|
|
398 funcWrite( paste("LMs with no terms suriving boosting: ",lsQCData$iNoTerms,sep=""), strProcessFileName )
|
|
399 funcWrite( paste("LMs performed: ",lsQCData$iLms,sep=""), strProcessFileName )
|
|
400 if(!is.null(lsQCData$lsQCCustom))
|
|
401 {
|
|
402 funcWrite("Custom preprocess QC data: ", strProcessFileName )
|
|
403 funcWrite(lsQCData$lsQCCustom, strProcessFileName )
|
|
404 } else {
|
|
405 funcWrite("No custom preprocess QC data.", strProcessFileName )
|
|
406 }
|
|
407 funcWrite( "\n#Details###########################", strProcessFileName )
|
|
408 funcWrite("\nInitial Data Count: ", strProcessFileName )
|
|
409 funcWrite(lsQCData$aiDataInitial, strProcessFileName )
|
|
410 funcWrite("\nInitial Metadata Count: ", strProcessFileName )
|
|
411 funcWrite(lsQCData$aiMetadataInitial, strProcessFileName )
|
|
412 funcWrite("\nData Count after preprocess: ", strProcessFileName )
|
|
413 funcWrite(lsQCData$aiAfterPreprocess, strProcessFileName )
|
|
414 funcWrite("\nRemoved for missing metadata: ", strProcessFileName )
|
|
415 funcWrite(lsQCData$iMissingMetadata, strProcessFileName )
|
|
416 funcWrite("\nRemoved for missing data: ", strProcessFileName )
|
|
417 funcWrite(lsQCData$iMissingData, strProcessFileName )
|
|
418 funcWrite("\nDetailed outlier indices: ", strProcessFileName )
|
|
419 for(sFeature in names(lsQCData$liOutliers))
|
|
420 {
|
|
421 funcWrite(paste("Feature",sFeature,"Outlier indice(s):", paste(lsQCData$liOutliers[[sFeature]],collapse=",")), strProcessFileName )
|
|
422 }
|
|
423 funcWrite("\nMetadata which survived clean: ", strProcessFileName )
|
|
424 funcWrite(lsQCData$aiMetadataCleaned, strProcessFileName )
|
|
425 funcWrite("\nData which survived clean: ", strProcessFileName )
|
|
426 funcWrite(lsQCData$aiDataCleaned, strProcessFileName )
|
|
427 }
|
|
428
|
|
429 funcLMToNoNAFormula <-function(
|
|
430 lMod,
|
|
431 frmeTmp,
|
|
432 adCur
|
|
433 ){
|
|
434 dfCoef = coef(lMod)
|
|
435 astrCoefNames = setdiff(names(dfCoef[as.vector(!is.na(dfCoef))==TRUE]),"(Intercept)")
|
|
436 astrPredictors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))
|
|
437 strFormula = paste( "adCur ~", paste( sprintf( "`%s`", astrPredictors ), collapse = " + " ), sep = " " )
|
|
438 return(try( lm(as.formula( strFormula ), data=frmeTmp )))
|
|
439 }
|
|
440
|
|
441 funcFormulaStrToList <- function(
|
|
442 #Takes a lm or mixed model formula and returns a list of covariate names in the formula
|
|
443 strFormula
|
|
444 #Formula to extract covariates from
|
|
445 ){
|
|
446 #Return list
|
|
447 lsRetComparisons = c()
|
|
448
|
|
449 #If you get a null or na just return
|
|
450 if(is.null(strFormula)||is.na(strFormula)){return(lsRetComparisons)}
|
|
451
|
|
452 #Get test comparisons (predictor names from formula string)
|
|
453 asComparisons = gsub("`","",setdiff(unlist(strsplit(unlist(strsplit(strFormula,"~"))[2]," ")),c("","+")))
|
|
454
|
|
455 #Change metadata in formula to univariate comparisons
|
|
456 for(sComparison in asComparisons)
|
|
457 {
|
|
458 #Removed random covariate formating
|
|
459 lsParse = unlist(strsplit(sComparison, "[\\(\\|\\)]", perl=FALSE))
|
|
460 lsRetComparisons = c(lsRetComparisons,lsParse[length(lsParse)])
|
|
461 }
|
|
462 return(lsRetComparisons)
|
|
463 }
|
|
464
|
|
465 funcFormulaListToString <- function(
|
|
466 # Using covariate and random covariate names, creates a lm or mixed model formula
|
|
467 # returns a vector of c(strLM, strMixedModel), one will be NA given the existance of random covariates.
|
|
468 # On error c(NA,NA) is given
|
|
469 astrTerms,
|
|
470 #Fixed covariates or all covariates if using an lm
|
|
471 astrRandomCovariates = NULL
|
|
472 #Random covariates for a mixed model
|
|
473 ){
|
|
474 strRetLMFormula = NA
|
|
475 strRetMMFormula = NA
|
|
476
|
|
477 #If no covariates return NA
|
|
478 if(is.null(astrTerms)){return(c(strRetLMFormula, strRetMMFormula))}
|
|
479
|
|
480 #Get fixed covariates
|
|
481 astrFixedCovariates = setdiff(astrTerms,astrRandomCovariates)
|
|
482
|
|
483 #If no fixed coavariates return NA
|
|
484 # Can not run a model with no fixed covariate, restriction of lmm
|
|
485 if(length(astrFixedCovariates)==0){return(c(strRetLMFormula, strRetMMFormula))}
|
|
486
|
|
487 # Fixed Covariates
|
|
488 strFixedCovariates = paste( sprintf( "`%s`", astrFixedCovariates ), collapse = " + " )
|
|
489
|
|
490 #If random covariates, set up a formula for mixed models
|
|
491 if(length(astrRandomCovariates)>0)
|
|
492 {
|
|
493 #Format for lmer
|
|
494 #strRetFormula <- paste( "adCur ~ ", paste( sprintf( "(1|`%s`))", intersect(astrRandomCovariates, astrTerms)), collapse = " + " ))
|
|
495 #Format for glmmpql
|
|
496 strRandomCovariates = paste( sprintf( "1|`%s`", setdiff(astrRandomCovariates, astrTerms)), collapse = " + " )
|
|
497 strRetMMFormula <- paste( "adCur ~ ", strFixedCovariates, " + ", strRandomCovariates, sep="")
|
|
498 } else {
|
|
499 #This is either the formula for all covariates in an lm or fixed covariates in the lmm
|
|
500 strRetLMFormula <- paste( "adCur ~ ", strFixedCovariates, sep="")
|
|
501 }
|
|
502 return(c(strRetLMFormula, strRetMMFormula))
|
|
503 } |