diff maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R @ 6:ca61989bc3b4

Uploaded
author george-weingart
date Sun, 08 Feb 2015 23:39:43 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R	Sun Feb 08 23:39:43 2015 -0500
@@ -0,0 +1,86 @@
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+inlinedocs <- function(
+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
+##description<< Creates a summary of association detail files.
+) { return( pArgs ) }
+
+#Logging class
+suppressMessages(library(logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
+
+# Get logger
+c_logrMaaslin	<- getLogger( "maaslin" )
+
+funcSummarizeDirectory = function(
+### Summarizes the massline detail files into one file based on significance.
+astrOutputDirectory,
+### The output directory to find the MaAsLin results.
+strBaseName,
+### The prefix string used in maaslin to start the detail files.
+astrSummaryFileName,
+### The summary file's name, should be a path not a file name
+astrKeyword,
+### The column name of the data to check significance before adding a detail to the summary
+afSignificanceLevel
+### The value of significance the data must be at or below to be included in the summary (0.0 is most significant; like p-values)
+){
+  #Store significant data elements
+  dfSignificantData = NULL
+
+  #Get detail files in output directory
+  astrlsDetailFiles = list.files(astrOutputDirectory, pattern=paste(strBaseName,"-","[[:print:]]*",c_sDetailFileSuffix,sep=""), full.names=TRUE)
+  logdebug(format(astrlsDetailFiles),c_logrMaaslin)
+
+  #For each file after the first file
+  for(astrFile in astrlsDetailFiles)
+  {
+    #Read in data and reduce to significance
+    dfDetails = read.table(astrFile, header=TRUE, sep=c_cTableDelimiter)
+    dfDetails = dfDetails[which(dfDetails[astrKeyword] <= afSignificanceLevel),]
+
+    #Combine with other data if it exists
+    if(is.null(dfSignificantData))
+    {
+      dfSignificantData = dfDetails
+    } else {
+      dfSignificantData = rbind(dfSignificantData,dfDetails)
+    }
+  }
+  
+  #Write data to file
+  unlink(astrSummaryFileName)
+  if(is.null(dfSignificantData))
+  {
+    funcWrite("No significant data found.",astrSummaryFileName)
+    return( NULL )
+  } else {
+    #Sort by metadata and then significance
+    dfSignificantData = dfSignificantData[order(dfSignificantData$Value, dfSignificantData$P.value, decreasing = FALSE),]
+    funcWriteTable( dfSignificantData, astrSummaryFileName, fAppend = FALSE )
+    # Sort by q.value and return
+    return( dfSignificantData[ order( dfSignificantData$P.value, decreasing = FALSE ), ] )
+  }
+}