annotate maaslin-4450aa4ecc84/src/lib/BoostGLM.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
3 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
9 #the following conditions:
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
10 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
13 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
20 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
24 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
25
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
28 ##description<< Manages the quality control of data and the performance of analysis (univariate or multivariate), regularization, and data (response) transformation.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
30
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
31 ### Load libraries quietly
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
32 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
33 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
34 suppressMessages(library( logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
35 suppressMessages(library( outliers, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
36 suppressMessages(library( robustbase, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
37 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
38
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
39 ### Get constants
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
40 #source(file.path("input","maaslin","src","Constants.R"))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
41 #source("Constants.R")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
42
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
43 ## Get logger
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
44 c_logrMaaslin <- getLogger( "maaslin" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
45
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
46 funcDoGrubbs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
47 ### Use the Grubbs Test to identify outliers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
48 iData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
49 ### Column index in the data frame to test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
50 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
51 ### The data frame holding the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
52 dPOutlier,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
53 ### P-value threshold to indicate an outlier is significant
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
54 lsQC
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
55 ### List holding the QC info of the cleaning step. Which indices are outliers is added.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
56 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
57 adData <- frmeData[,iData]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
58
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
59 # Original number of NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
60 viNAOrig = which(is.na(adData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
61
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
62 while( TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
63 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
64 lsTest <- try( grubbs.test( adData ), silent = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
65 if( ( class( lsTest ) == "try-error" ) || is.na( lsTest$p.value ) || ( lsTest$p.value > dPOutlier ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
66 {break}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
67 viOutliers = outlier( adData, logical = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
68 adData[viOutliers] <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
69 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
70
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
71 # Record removed data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
72 viNAAfter = which(is.na(adData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
73
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
74 # If all were set to NA then ignore the filtering
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
75 if(length(adData)==length(viNAAfter))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
76 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
77 viNAAfter = viNAOrig
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
78 adData = frmeData[,iData]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
79 c_logrMaaslin$info( paste("Grubbs Test:: Identifed all data as outliers so was inactived for index=",iData," data=",paste(as.vector(frmeData[,iData]),collapse=","), "number zeros=", length(which(frmeData[,iData]==0)), sep = " " ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
80 } else if(mean(adData, na.rm=TRUE) == 0) {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
81 viNAAfter = viNAOrig
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
82 adData = frmeData[,iData]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
83 c_logrMaaslin$info( paste("Grubbs Test::Removed all values but 0, ignored. Index=",iData,".",sep=" " ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
84 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
85 # Document removal
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
86 if( sum( is.na( adData ) ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
87 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
88 c_logrMaaslin$info( "Grubbs Test::Removing %d outliers from %s", sum( is.na( adData ) ), colnames(frmeData)[iData] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
89 c_logrMaaslin$info( format( rownames( frmeData )[is.na( adData )] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
90 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
91 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
92
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
93 return(list(data=adData,outliers=length(viNAAfter)-length(viNAOrig),indices=setdiff(viNAAfter,viNAOrig)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
94 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
95
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
96 funcDoFenceTest <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
97 ### Use a threshold based on the quartiles of the data to identify outliers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
98 iData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
99 ### Column index in the data frame to test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
100 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
101 ### The data frame holding the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
102 dFence
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
103 ### The fence outside the first and third quartiles to use as a threshold for cutt off.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
104 ### This many times the interquartile range +/- to the 3rd/1st quartiles
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
105 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
106 # Establish fence
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
107 adData <- frmeData[,iData]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
108 adQ <- quantile( adData, c(0.25, 0.5, 0.75), na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
109
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
110 dIQR <- adQ[3] - adQ[1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
111 if(!dIQR)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
112 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
113 dIQR = sd(adData,na.rm = TRUE)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
114 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
115 dUF <- adQ[3] + ( dFence * dIQR )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
116 dLF <- adQ[1] - ( dFence * dIQR )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
117
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
118 # Record indices of values outside of fence to remove and remove.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
119 aiRemove <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
120 for( j in 1:length( adData ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
121 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
122 d <- adData[j]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
123 if( !is.na( d ) && ( ( d < dLF ) || ( d > dUF ) ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
124 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
125 aiRemove <- c(aiRemove, j)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
126 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
127 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
128
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
129 if(length(aiRemove)==length(adData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
130 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
131 aiRemove = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
132 c_logrMaaslin$info( "OutliersByFence:: Identified all data as outlier so was inactivated for index=", iData,"data=", paste(as.vector(frmeData[,iData]),collapse=","), "number zeros=", length(which(frmeData[,iData]==0)), sep=" " )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
133 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
134 adData[aiRemove] <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
135
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
136 # Document to screen
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
137 if( length( aiRemove ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
138 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
139 c_logrMaaslin$info( "OutliersByFence::Removing %d outliers from %s", length( aiRemove ), colnames(frmeData)[iData] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
140 c_logrMaaslin$info( format( rownames( frmeData )[aiRemove] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
141 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
142 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
143
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
144 return(list(data=adData,outliers=length(aiRemove),indices=aiRemove))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
145 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
146
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
147 funcZerosAreUneven = function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
148 ###
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
149 vdRawData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
150 ### Raw data to be checked during transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
151 funcTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
152 ### Data transform to perform
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
153 vsStratificationFeatures,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
154 ### Groupings to check for unevenness
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
155 dfData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
156 ### Data frame holding the features
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
157 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
158 # Return indicator of unevenness
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
159 fUneven = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
160
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
161 # Transform the data to compare
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
162 vdTransformed = funcTransform( vdRawData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
163
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
164 # Go through each stratification of data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
165 for( sStratification in vsStratificationFeatures )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
166 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
167 # Current stratification
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
168 vFactorStrats = dfData[[ sStratification ]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
169
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
170 # If the metadata is not a factor then skip
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
171 # Only binned data can be evaluated this way.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
172 if( !is.factor( vFactorStrats )){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
173
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
174 viZerosCountsRaw = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
175 for( sLevel in levels( vFactorStrats ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
176 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
177 vdTest = vdRawData[ which( vFactorStrats == sLevel ) ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
178 viZerosCountsRaw = c( viZerosCountsRaw, length(which(vdTest == 0)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
179 vdTest = vdTransformed[ which( vFactorStrats == sLevel ) ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
180 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
181 dExpectation = 1 / length( viZerosCountsRaw )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
182 dMin = dExpectation / 2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
183 dMax = dExpectation + dMin
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
184 viZerosCountsRaw = viZerosCountsRaw / sum( viZerosCountsRaw )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
185 if( ( length( which( viZerosCountsRaw <= dMin ) ) > 0 ) || ( length( which( viZerosCountsRaw >= dMax ) ) > 0 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
186 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
187 return( TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
188 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
189 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
190 return( fUneven )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
191 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
192
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
193 funcTransformIncreasesOutliers = function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
194 ### Checks if a data transform increases outliers in a distribution
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
195 vdRawData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
196 ### Raw data to check for outlier zeros
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
197 funcTransform
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
198 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
199 iUnOutliers = length( boxplot( vdRawData, plot = FALSE )$out )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
200 iTransformedOutliers = length( boxplot( funcTransform( vdRawData ), plot = FALSE )$out )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
201
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
202 return( iUnOutliers <= iTransformedOutliers )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
203 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
204
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
205 funcClean <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
206 ### Properly clean / get data ready for analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
207 ### Includes custom analysis from the custom R script if it exists
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
208 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
209 ### Data frame, input data to be acted on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
210 funcDataProcess,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
211 ### Custom script that can be given to perform specialized processing before MaAsLin does.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
212 aiMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
213 ### Indices of columns in frmeData which are metadata for analysis.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
214 aiData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
215 ### Indices of column in frmeData which are (abundance) data for analysis.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
216 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
217 ### List that will hold the quality control information which is written in the output directory.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
218 astrNoImpute = c(),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
219 ### An array of column names of frmeData not to impute.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
220 dMinSamp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
221 ### Minimum number of samples
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
222 dMinAbd,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
223 # Minimum sample abundance
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
224 dFence,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
225 ### How many quartile ranges defines the fence to define outliers.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
226 funcTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
227 ### The data transformation function or a dummy function that does not affect the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
228 dPOutlier = 0.05
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
229 ### The significance threshold for the grubbs test to identify an outlier.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
230 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
231 # Call the custom script and set current data and indicies to the processes data and indicies.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
232 c_logrMaaslin$debug( "Start Clean")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
233 if( !is.null( funcDataProcess ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
234 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
235 c_logrMaaslin$debug("Additional preprocess function attempted.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
236
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
237 pTmp <- funcDataProcess( frmeData=frmeData, aiMetadata=aiMetadata, aiData=aiData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
238 frmeData = pTmp$frmeData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
239 aiMetadata = pTmp$aiMetadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
240 aiData = pTmp$aiData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
241 lsQCCounts$lsQCCustom = pTmp$lsQCCounts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
242 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
243 # Set data indicies after custom QC process.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
244 lsQCCounts$aiAfterPreprocess = aiData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
245
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
246 # Remove missing data, remove any sample that has less than dMinSamp * the number of data or low abundance
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
247 aiRemove = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
248 aiRemoveLowAbundance = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
249 for( iCol in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
250 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
251 adCol = frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
252 adCol[!is.finite( adCol )] <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
253 if( ( sum( !is.na( adCol ) ) < ( dMinSamp * length( adCol ) ) ) ||
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
254 ( length( unique( na.omit( adCol ) ) ) < 2 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
255 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
256 aiRemove = c(aiRemove, iCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
257 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
258 if( sum(adCol > dMinAbd, na.rm=TRUE ) < (dMinSamp * length( adCol)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
259 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
260 aiRemoveLowAbundance = c(aiRemoveLowAbundance, iCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
261 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
262 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
263 # Remove and document
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
264 aiData = setdiff( aiData, aiRemove )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
265 aiData = setdiff( aiData, aiRemoveLowAbundance )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
266 lsQCCounts$iMissingData = aiRemove
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
267 lsQCCounts$iLowAbundanceData = aiRemoveLowAbundance
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
268 if(length(aiRemove))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
269 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
270 c_logrMaaslin$info( "Removing the following for data lower bound.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
271 c_logrMaaslin$info( format( colnames( frmeData )[aiRemove] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
272 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
273 if(length(aiRemoveLowAbundance))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
274 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
275 c_logrMaaslin$info( "Removing the following for too many low abundance bugs.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
276 c_logrMaaslin$info( format( colnames( frmeData )[aiRemoveLowAbundance] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
277 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
278
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
279 #Transform data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
280 iTransformed = 0
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
281 viNotTransformedData = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
282 for(aiDatum in aiData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
283 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
284 adValues = frmeData[,aiDatum]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
285 # if( ! funcTransformIncreasesOutliers( adValues, funcTransform ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
286 # {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
287 frmeData[,aiDatum] = funcTransform( adValues )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
288 # iTransformed = iTransformed + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
289 # } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
290 # viNotTransformedData = c( viNotTransformedData, aiDatum )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
291 # }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
292 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
293 c_logrMaaslin$info(paste("Number of features transformed = ",iTransformed))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
294
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
295 # Metadata: Properly factorize all logical data and integer and number data with less than iNonFactorLevelThreshold
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
296 # Also record which are numeric metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
297 aiNumericMetadata = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
298 for( i in aiMetadata )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
299 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
300 if( ( class( frmeData[,i] ) %in% c("integer", "numeric", "logical") ) &&
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
301 ( length( unique( frmeData[,i] ) ) < c_iNonFactorLevelThreshold ) ) {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
302 c_logrMaaslin$debug(paste("Changing metadatum from numeric/integer/logical to factor",colnames(frmeData)[i],sep="="))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
303 frmeData[,i] = factor( frmeData[,i] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
304 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
305 if( class( frmeData[,i] ) %in% c("integer","numeric") )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
306 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
307 aiNumericMetadata = c(aiNumericMetadata,i)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
308 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
309 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
310
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
311 # Remove outliers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
312 # If the dFence Value is set use the method of defining the outllier as
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
313 # dFence * the interquartile range + or - the 3rd and first quartile respectively.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
314 # If not the gibbs test is used.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
315 lsQCCounts$aiDataSumOutlierPerDatum = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
316 lsQCCounts$aiMetadataSumOutlierPerDatum = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
317 lsQCCounts$liOutliers = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
318
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
319 if( dFence > 0.0 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
320 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
321 # For data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
322 for( iData in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
323 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
324 lOutlierInfo <- funcDoFenceTest(iData=iData,frmeData=frmeData,dFence=dFence)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
325 frmeData[,iData] <- lOutlierInfo[["data"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
326 lsQCCounts$aiDataSumOutlierPerDatum <- c(lsQCCounts$aiDataSumOutlierPerDatum,lOutlierInfo[["outliers"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
327 if(lOutlierInfo[["outliers"]]>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
328 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
329 lsQCCounts$liOutliers[[paste(iData,sep="")]] <- lOutlierInfo[["indices"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
330 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
331 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
332
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
333 # Remove outlier non-factor metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
334 for( iMetadata in aiNumericMetadata )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
335 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
336 lOutlierInfo <- funcDoFenceTest(iData=iMetadata,frmeData=frmeData,dFence=dFence)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
337 frmeData[,iMetadata] <- lOutlierInfo[["data"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
338 lsQCCounts$aiMetadataSumOutlierPerDatum <- c(lsQCCounts$aiMetadataSumOutlierPerDatum,lOutlierInfo[["outliers"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
339 if(lOutlierInfo[["outliers"]]>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
340 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
341 lsQCCounts$liOutliers[[paste(iMetadata,sep="")]] <- lOutlierInfo[["indices"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
342 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
343 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
344 #Do not use the fence, use the Grubbs test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
345 } else if(dPOutlier!=0.0){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
346 # For data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
347 for( iData in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
348 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
349 lOutlierInfo <- funcDoGrubbs(iData=iData,frmeData=frmeData,dPOutlier=dPOutlier)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
350 frmeData[,iData] <- lOutlierInfo[["data"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
351 lsQCCounts$aiDataSumOutlierPerDatum <- c(lsQCCounts$aiDataSumOutlierPerDatum,lOutlierInfo[["outliers"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
352 if(lOutlierInfo[["outliers"]]>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
353 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
354 lsQCCounts$liOutliers[[paste(iData,sep="")]] <- lOutlierInfo[["indices"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
355 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
356 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
357 for( iMetadata in aiNumericMetadata )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
358 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
359 lOutlierInfo <- funcDoGrubbs(iData=iMetadata,frmeData=frmeData,dPOutlier=dPOutlier)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
360 frmeData[,iMetadata] <- lOutlierInfo[["data"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
361 lsQCCounts$aiMetadataSumOutlierPerDatum <- c(lsQCCounts$aiMetadataSumOutlierPerDatum,lOutlierInfo[["outliers"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
362 if(lOutlierInfo[["outliers"]]>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
363 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
364 lsQCCounts$liOutliers[[paste(iMetadata,sep="")]] <- lOutlierInfo[["indices"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
365 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
366 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
367 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
368
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
369 # Metadata: Remove missing data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
370 # This is defined as if there is only one non-NA value or
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
371 # if the number of NOT NA data is less than a percentage of the data defined by dMinSamp
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
372 aiRemove = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
373 for( iCol in c(aiMetadata) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
374 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
375 adCol = frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
376 if( ( sum( !is.na( adCol ) ) < ( dMinSamp * length( adCol ) ) ) ||
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
377 ( length( unique( na.omit( adCol ) ) ) < 2 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
378 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
379 aiRemove = c(aiRemove, iCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
380 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
381 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
382
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
383 # Remove metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
384 aiMetadata = setdiff( aiMetadata, aiRemove )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
385
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
386 # Update the data which was removed.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
387 lsQCCounts$iMissingMetadata = aiRemove
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
388 if(length(aiRemove))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
389 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
390 c_logrMaaslin$info("Removing the following metadata for too much missing data or only one data value outside of NA.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
391 c_logrMaaslin$info(format(colnames( frmeData )[aiRemove]))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
392 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
393
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
394 # Keep track of factor levels in a list for later use
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
395 lslsFactors <- list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
396 for( iCol in c(aiMetadata) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
397 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
398 aCol <- frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
399 if( class( aCol ) == "factor" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
400 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
401 lslsFactors[[length( lslsFactors ) + 1]] <- list(iCol, levels( aCol ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
402 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
403 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
404
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
405 # Replace missing data values by the mean of the data column.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
406 # Remove samples that were all NA from the cleaning and so could not be imputed.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
407 aiRemoveData = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
408 for( iCol in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
409 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
410 adCol <- frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
411 adCol[is.infinite( adCol )] <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
412 adCol[is.na( adCol )] <- mean( adCol[which(adCol>0)], na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
413 frmeData[,iCol] <- adCol
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
414
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
415 if(length(which(is.na(frmeData[,iCol]))) == length(frmeData[,iCol]))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
416 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
417 c_logrMaaslin$info( paste("Removing data", iCol, "for being all NA after QC"))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
418 aiRemoveData = c(aiRemoveData,iCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
419 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
420 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
421
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
422 # Remove and document
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
423 aiData = setdiff( aiData, aiRemoveData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
424 lsQCCounts$iMissingData = c(lsQCCounts$iMissingData,aiRemoveData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
425 if(length(aiRemoveData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
426 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
427 c_logrMaaslin$info( "Removing the following for having only NAs after cleaning (maybe due to only having NA after outlier testing).")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
428 c_logrMaaslin$info( format( colnames( frmeData )[aiRemoveData] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
429 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
430
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
431 #Use na.gam.replace to manage NA metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
432 aiTmp <- setdiff( aiMetadata, which( colnames( frmeData ) %in% astrNoImpute ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
433 # Keep tack of NAs so the may not be plotted later.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
434 liNaIndices = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
435 lsNames = names(frmeData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
436 for( i in aiTmp)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
437 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
438 liNaIndices[[lsNames[i]]] = which(is.na(frmeData[,i]))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
439 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
440 frmeData[,aiTmp] <- na.gam.replace( frmeData[,aiTmp] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
441
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
442 #If NA is a value in factor data, set the NA as a level.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
443 for( lsFactor in lslsFactors )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
444 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
445 iCol <- lsFactor[[1]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
446 aCol <- frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
447 if( "NA" %in% levels( aCol ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
448 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
449 if(! lsNames[iCol] %in% astrNoImpute)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
450 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
451 liNaIndices[[lsNames[iCol]]] = union(which(is.na(frmeData[,iCol])),which(frmeData[,iCol]=="NA"))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
452 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
453 frmeData[,iCol] <- factor( aCol, levels = c(lsFactor[[2]], "NA") )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
454 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
455 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
456
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
457 # Make sure there is a minimum number of non-0 measurements
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
458 aiRemove = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
459 for( iCol in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
460 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
461 adCol = frmeData[,iCol]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
462 if(length( which(adCol!=0)) < ( dMinSamp * length( adCol ) ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
463 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
464 aiRemove = c(aiRemove, iCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
465 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
466 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
467
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
468 # Remove and document
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
469 aiData = setdiff( aiData, aiRemove)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
470 lsQCCounts$iZeroDominantData = aiRemove
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
471 if(length(aiRemove))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
472 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
473 c_logrMaaslin$info( "Removing the following for having not enough non-zero measurments for analysis.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
474 c_logrMaaslin$info( format( colnames( frmeData )[aiRemove] ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
475 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
476
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
477 c_logrMaaslin$debug("End FuncClean")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
478 return( list(frmeData = frmeData, aiMetadata = aiMetadata, aiData = aiData, lsQCCounts = lsQCCounts, liNaIndices=liNaIndices, viNotTransformedData = viNotTransformedData) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
479 ### Return list of
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
480 ### frmeData: The Data after cleaning
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
481 ### aiMetadata: The indices of the metadata still being used after filtering
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
482 ### aiData: The indices of the data still being used after filtering
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
483 ### lsQCCOunts: QC info
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
484 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
485
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
486 funcBugs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
487 ### Run analysis of all data features against all metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
488 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
489 ### Cleaned data including metadata, and data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
490 lsData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
491 ### This list is a general container for data as the analysis occurs, think about it as a cache for the analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
492 aiMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
493 ### Indices of metadata used in analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
494 aiData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
495 ### Indices of response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
496 aiNotTransformedData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
497 ### Indicies of the data not transformed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
498 strData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
499 ### Log file name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
500 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
501 ### Significance threshold for the qvalue cut off
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
502 fInvert=FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
503 ### Invert images to have a black background
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
504 strDirOut = NA,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
505 ### Output project directory
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
506 funcReg=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
507 ### Function for regularization
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
508 funcTransform=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
509 ### Function used to transform the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
510 funcUnTransform=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
511 ### If a transform is used the opposite of that transfor must be used on the residuals in the partial residual plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
512 lsNonPenalizedPredictors=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
513 ### These predictors will not be penalized in the feature (model) selection step
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
514 funcAnalysis=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
515 ### Function to perform association analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
516 lsRandomCovariates=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
517 ### List of string names of metadata which will be treated as random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
518 funcGetResults=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
519 ### Function to unpack results from analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
520 fDoRPlot=TRUE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
521 ### Plot residuals
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
522 fOmitLogFile = FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
523 ### Stops the creation of the log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
524 fAllvAll=FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
525 ### Flag to turn on all against all comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
526 liNaIndices = list(),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
527 ### Indicies of imputed NA data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
528 lxParameters=list(),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
529 ### List holds parameters for different variable selection techniques
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
530 strTestingCorrection = "BH",
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
531 ### Correction for multiple testing
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
532 fIsUnivariate = FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
533 ### Indicates if the function is univariate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
534 fZeroInflated = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
535 ### Indicates to use a zero infalted model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
536 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
537 c_logrMaaslin$debug("Start funcBugs")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
538 # If no output directory is indicated
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
539 # Then make it the current directory
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
540 if( is.na( strDirOut ) || is.null( strDirOut ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
541 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
542 if( !is.na( strData ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
543 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
544 strDirOut <- paste( dirname( strData ), "/", sep = "" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
545 } else { strDirOut = "" }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
546 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
547
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
548 # Make th log file and output file names based on the log file name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
549 strLog = NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
550 strBase = ""
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
551 if(!is.na(strData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
552 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
553 strBaseOut <- paste( strDirOut, sub( "\\.([^.]+)$", "", basename(strData) ), sep = "/" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
554 strLog <- paste( strBaseOut,c_sLogFileSuffix, ".txt", sep = "" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
555 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
556
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
557 # If indicated, stop the creation of the log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
558 # Otherwise delete the log file if it exists and log
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
559 if(fOmitLogFile){ strLog = NA }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
560 if(!is.na(strLog))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
561 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
562 c_logrMaaslin$info( "Outputting to: %s", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
563 unlink( strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
564 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
565
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
566 # Will contain pvalues
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
567 adP = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
568 adPAdj = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
569
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
570 # List of lists with association information
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
571 lsSig <- list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
572 # Go through each data that was not previously removed and perform inference
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
573 for( iTaxon in aiData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
574 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
575 # Log to screen progress per 10 associations.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
576 # Can be thown off if iTaxon is missing a mod 10 value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
577 # So the taxons may not be logged every 10 but not a big deal
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
578 if( !( iTaxon %% 10 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
579 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
580 c_logrMaaslin$info( "Taxon %d/%d", iTaxon, max( aiData ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
581 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
582
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
583 # Call analysis method
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
584 lsOne <- funcBugHybrid( iTaxon=iTaxon, frmeData=frmeData, lsData=lsData, aiMetadata=aiMetadata, dSig=dSig, adP=adP, lsSig=lsSig, funcTransform=funcTransform, funcUnTransform=funcUnTransform, strLog=strLog, funcReg=funcReg, lsNonPenalizedPredictors=lsNonPenalizedPredictors, funcAnalysis=funcAnalysis, lsRandomCovariates=lsRandomCovariates, funcGetResult=funcGetResults, fAllvAll=fAllvAll, fIsUnivariate=fIsUnivariate, lxParameters=lxParameters, fZeroInflated=fZeroInflated, fIsTransformed= ! iTaxon %in% aiNotTransformedData )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
585
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
586 # If you get a NA (happens when the lmm gets all random covariates) move on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
587 if( is.na( lsOne ) ){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
588
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
589 # The updating of the following happens in the inference method call in the funcBugHybrid call
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
590 # New pvalue array
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
591 adP <- lsOne$adP
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
592 # New lsSig contains data about significant feature v metadata comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
593 lsSig <- lsOne$lsSig
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
594 # New qc data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
595 lsData$lsQCCounts = lsOne$lsQCCounts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
596 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
597
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
598 # Log the QC info
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
599 c_logrMaaslin$debug("lsData$lsQCCounts")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
600 c_logrMaaslin$debug(format(lsData$lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
601
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
602 if( is.null( adP ) ) { return( NULL ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
603
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
604 # Perform bonferonni corrections on factor data (for levels), calculate the number of tests performed, and FDR adjust for multiple hypotheses
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
605 # Perform Bonferonni adjustment on factor data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
606 for( iADIndex in 1:length( adP ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
607 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
608 # Only perform on factor data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
609 if( is.factor( lsSig[[ iADIndex ]]$metadata ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
610 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
611 adPAdj = c( adPAdj, funcBonferonniCorrectFactorData( dPvalue = adP[ iADIndex ], vsFactors = lsSig[[ iADIndex ]]$metadata, fIgnoreNAs = length(liNaIndices)>0) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
612 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
613 adPAdj = c( adPAdj, adP[ iADIndex ] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
614 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
615 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
616
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
617 iTests = funcCalculateTestCounts(iDataCount = length(aiData), asMetadata = intersect( lsData$astrMetadata, colnames( frmeData )[aiMetadata] ), asForced = lsNonPenalizedPredictors, asRandom = lsRandomCovariates, fAllvAll = fAllvAll)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
618
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
619 #Get indices of sorted data after the factor correction but before the multiple hypothesis corrections.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
620 aiSig <- sort.list( adPAdj )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
621
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
622 # Perform FDR BH
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
623 adQ = p.adjust(adPAdj, method=strTestingCorrection, n=max(length(adPAdj), iTests))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
624
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
625 # Find all covariates that had significant associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
626 astrNames <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
627 for( i in 1:length( lsSig ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
628 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
629 astrNames <- c(astrNames, lsSig[[i]]$name)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
630 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
631 astrNames <- unique( astrNames )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
632
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
633 # Sets up named label return for global plotting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
634 lsReturnTaxa <- list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
635 for( j in aiSig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
636 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
637 if( adQ[j] > dSig ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
638 strTaxon <- lsSig[[j]]$taxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
639 if(strTaxon %in% names(lsReturnTaxa))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
640 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
641 lsReturnTaxa[[strTaxon]] = min(lsReturnTaxa[[strTaxon]],adQ[j])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
642 } else { lsReturnTaxa[[strTaxon]] = adQ[j]}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
643 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
644
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
645 # For each covariate with significant associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
646 # Write out a file with association information
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
647 for( strName in astrNames )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
648 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
649 strFileTXT <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
650 strFilePDF <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
651 for( j in aiSig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
652 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
653 lsCur <- lsSig[[j]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
654 strCur <- lsCur$name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
655
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
656 if( strCur != strName ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
657
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
658 strTaxon <- lsCur$taxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
659 adData <- lsCur$data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
660 astrFactors <- lsCur$factors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
661 adCur <- lsCur$metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
662 adY <- adData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
663
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
664 if( is.na( strData ) ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
665
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
666 ## If the text file output is not written to yet
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
667 ## make the file names, and delete any previous file output
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
668 if( is.na( strFileTXT ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
669 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
670 strFileTXT <- sprintf( "%s-%s.txt", strBaseOut, strName )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
671 unlink(strFileTXT)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
672 funcWrite( c("Variable", "Feature", "Value", "Coefficient", "N", "N not 0", "P-value", "Q-value"), strFileTXT )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
673 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
674
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
675 ## Write text output
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
676 funcWrite( c(strName, strTaxon, lsCur$orig, lsCur$value, length( adData ), sum( adData > 0 ), adP[j], adQ[j]), strFileTXT )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
677
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
678 ## If the significance meets the threshold
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
679 ## Write PDF file output
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
680 if( adQ[j] > dSig ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
681
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
682 # Do not make residuals plots if univariate is selected
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
683 strFilePDF = funcPDF( frmeTmp=frmeData, lsCur=lsCur, curPValue=adP[j], curQValue=adQ[j], strFilePDF=strFilePDF, strBaseOut=strBaseOut, strName=strName, funcUnTransform= funcUnTransform, fDoResidualPlot=fDoRPlot, fInvert=fInvert, liNaIndices=liNaIndices )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
684 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
685 if( dev.cur( ) != 1 ) { dev.off( ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
686 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
687 aiTmp <- aiData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
688
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
689 logdebug("End funcBugs", c_logMaaslin)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
690 return(list(lsReturnBugs=lsReturnTaxa, lsQCCounts=lsData$lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
691 ### List of data features successfully associated without error and quality control data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
692 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
693
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
694 #Lightly Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
695 ### Performs analysis for 1 feature
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
696 ### iTaxon: integer Taxon index to be associated with data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
697 ### frmeData: Data frame The full data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
698 ### lsData: List of all associated data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
699 ### aiMetadata: Numeric vector of indices
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
700 ### dSig: Numeric significance threshold for q-value cut off
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
701 ### adP: List of pvalues from associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
702 ### lsSig: List which serves as a cache of data about significant associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
703 ### strLog: String file to log to
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
704 funcBugHybrid <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
705 ### Performs analysis for 1 feature
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
706 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
707 ### integer Taxon index to be associated with data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
708 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
709 ### Data frame, the full data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
710 lsData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
711 ### List of all associated data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
712 aiMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
713 ### Numeric vector of indices
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
714 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
715 ### Numeric significance threshold for q-value cut off
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
716 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
717 ### List of pvalues from associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
718 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
719 ### List which serves as a cache of data about significant associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
720 funcTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
721 ### The tranform used on the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
722 funcUnTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
723 ### The reverse transform on the data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
724 strLog = NA,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
725 ### String, file to which to log
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
726 funcReg=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
727 ### Function to perform regularization
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
728 lsNonPenalizedPredictors=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
729 ### These predictors will not be penalized in the feature (model) selection step
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
730 funcAnalysis=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
731 ### Function to perform association analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
732 lsRandomCovariates=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
733 ### List of string names of metadata which will be treated as random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
734 funcGetResult=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
735 ### Function to unpack results from analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
736 fAllvAll=FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
737 ### Flag to turn on all against all comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
738 fIsUnivariate = FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
739 ### Indicates the analysis function is univariate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
740 lxParameters=list(),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
741 ### List holds parameters for different variable selection techniques
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
742 fZeroInflated = FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
743 ### Indicates if to use a zero infalted model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
744 fIsTransformed = TRUE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
745 ### Indicates that the bug is transformed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
746 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
747 #dTime00 <- proc.time()[3]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
748 #Get metadata column names
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
749 astrMetadata = intersect( lsData$astrMetadata, colnames( frmeData )[aiMetadata] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
750
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
751 #Get data measurements that are not NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
752 aiRows <- which( !is.na( frmeData[,iTaxon] ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
753
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
754 #Get the dataframe of non-na data measurements
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
755 frmeTmp <- frmeData[aiRows,]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
756
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
757 #Set the min boosting selection frequency to a default if not given
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
758 if( is.na( lxParameters$dFreq ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
759 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
760 lxParameters$dFreq <- 0.5 / length( c(astrMetadata) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
761 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
762
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
763 # Get the full data for the bug feature
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
764 adCur = frmeTmp[,iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
765 lxParameters$sBugName = names(frmeTmp[iTaxon])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
766
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
767 # This can run multiple models so some of the results are held in lists and some are not
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
768 llmod = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
769 liTaxon = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
770 lastrTerms = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
771
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
772 # Build formula for simple mixed effects models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
773 # Removes random covariates from variable selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
774 astrMetadata = setdiff(astrMetadata, lsRandomCovariates)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
775 strFormula <- paste( "adCur ~", paste( sprintf( "`%s`", astrMetadata ), collapse = " + " ), sep = " " )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
776
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
777 # Document the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
778 funcWrite( c("#taxon", colnames( frmeTmp )[iTaxon]), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
779 funcWrite( c("#metadata", astrMetadata), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
780 funcWrite( c("#samples", rownames( frmeTmp )), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
781
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
782 #Model terms
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
783 astrTerms <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
784
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
785 # Attempt feature (model) selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
786 if(!is.na(funcReg))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
787 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
788 #Count model selection method attempts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
789 lsData$lsQCCounts$iBoosts = lsData$lsQCCounts$iBoosts + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
790 #Perform model selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
791 astrTerms <- funcReg(strFormula=strFormula, frmeTmp=frmeTmp, adCur=adCur, lsParameters=lxParameters, lsForcedParameters=lsNonPenalizedPredictors, strLog=strLog)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
792 #If the feature selection function is set to None, set all terms of the model to all the metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
793 } else { astrTerms = astrMetadata }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
794
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
795 # Get look through the boosting results to get a model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
796 # Holds the predictors in the predictors in the model that were selected by the boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
797 if(is.null( astrTerms )){lsData$lsQCCounts$iBoostErrors = lsData$lsQCCounts$iBoostErrors + 1}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
798
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
799 # Get the indices that are transformed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
800 # Of those indices check for uneven metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
801 # Untransform any of the metadata that failed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
802 # Failed means true for uneven occurences of zeros
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
803 # if( fIsTransformed )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
804 # {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
805 # vdUnevenZeroCheck = funcUnTransform( frmeData[[ iTaxon ]] )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
806 # if( funcZerosAreUneven( vdRawData=vdUnevenZeroCheck, funcTransform=funcTransform, vsStratificationFeatures=astrTerms, dfData=frmeData ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
807 # {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
808 # frmeData[[ iTaxon ]] = vdUnevenZeroCheck
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
809 # c_logrMaaslin$debug( paste( "Taxon transformation reversed due to unevenness of zero distribution.", iTaxon ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
810 # }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
811 # }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
812
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
813 # Run association analysis if predictors exist and an analysis function is specified
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
814 # Run analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
815 if(!is.na(funcAnalysis) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
816 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
817 #If there are selected and forced fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
818 if( length( astrTerms ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
819 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
820 #Count the association attempt
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
821 lsData$lsQCCounts$iLms = lsData$lsQCCounts$iLms + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
822
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
823 #Make the lm formula
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
824 #Build formula for simple mixed effects models using random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
825 strRandomCovariatesFormula = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
826 #Random covariates are forced
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
827 if(length(lsRandomCovariates)>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
828 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
829 #Format for lme
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
830 #Needed for changes to not allowing random covariates through the boosting process
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
831 strRandomCovariatesFormula <- paste( "adCur ~ ", paste( sprintf( "1|`%s`", lsRandomCovariates), collapse = " + " ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
832 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
833
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
834 #Set up a list of formula containing selected fixed variables changing and the forced fixed covariates constant
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
835 vstrFormula = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
836 #Set up suppressing forced covariates in a all v all scenario only
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
837 asSuppress = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
838 #Enable all against all comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
839 if(fAllvAll && !fIsUnivariate)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
840 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
841 lsVaryingCovariates = setdiff(astrTerms,lsNonPenalizedPredictors)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
842 lsConstantCovariates = setdiff(lsNonPenalizedPredictors,lsRandomCovariates)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
843 strConstantFormula = paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
844 asSuppress = lsConstantCovariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
845
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
846 if(length(lsVaryingCovariates)==0L)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
847 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
848 vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " )) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
849 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
850 for( sVarCov in lsVaryingCovariates )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
851 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
852 strTempFormula = paste( "adCur ~ `", sVarCov,"`",sep="")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
853 if(length(lsConstantCovariates)>0){ strTempFormula = paste(strTempFormula,strConstantFormula,sep=" + ") }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
854 vstrFormula <- c( vstrFormula, strTempFormula )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
855 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
856 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
857 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
858 #This is either the multivariate case formula for all covariates in an lm or fixed covariates in the lmm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
859 vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", astrTerms ), collapse = " + " )) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
860 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
861
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
862 #Run the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
863 for( strAnalysisFormula in vstrFormula )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
864 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
865 i = length(llmod)+1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
866 llmod[[i]] = funcAnalysis(strFormula=strAnalysisFormula, frmeTmp=frmeTmp, iTaxon=iTaxon, lsHistory=list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts), strRandomFormula=strRandomCovariatesFormula, fZeroInflated=fZeroInflated)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
867
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
868 liTaxon[[i]] = iTaxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
869 lastrTerms[[i]] = funcFormulaStrToList(strAnalysisFormula)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
870 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
871 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
872 #If there are no selected or forced fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
873 lsData$lsQCCounts$iNoTerms = lsData$lsQCCounts$iNoTerms + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
874 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
875 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
876 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
877
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
878 #Call funcBugResults and return it's return
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
879 if(!is.na(funcGetResult))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
880 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
881 #Format the results to a consistent expected result.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
882 return( funcGetResult( llmod=llmod, frmeData=frmeData, liTaxon=liTaxon, dSig=dSig, adP=adP, lsSig=lsSig, strLog=strLog, lsQCCounts=lsData$lsQCCounts, lastrCols=lastrTerms, asSuppressCovariates=asSuppress ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
883 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
884 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
885 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
886 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
887 }