view lib.r @ 13:1c30ff90f3ae draft

planemo upload commit 301d42e88026afdac618f4ec56fc6cbe19e3e419
author lecorguille
date Fri, 07 Apr 2017 07:42:18 -0400
parents 7da9252dd983
children a2c49996603e
line wrap: on
line source

# lib.r

#@author G. Le Corguille
#The function create a pdf from the different png generated by diffreport
diffreport_png2pdf <- function(filebase) {
    dir.create("pdf")

    pdfEicOutput = paste0("pdf/",filebase,"-eic_pdf.pdf")
    pdfBoxOutput = paste0("pdf/",filebase,"-box_pdf.pdf")

    system(paste0("gm convert ",filebase,"_eic/*.png ",pdfEicOutput))
    system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput))

}

#@author G. Le Corguille
#This function convert if it is required the Retention Time in minutes
RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
    if (convertRTMinute){
        #converting the retention times (seconds) into minutes
        print("converting the retention times into minutes in the variableMetadata")
        variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
        variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
        variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
    }
    return (variableMetadata)
}

#@author G. Le Corguille
#This function format ions identifiers
formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
    splitDeco = strsplit(as.character(variableMetadata$name),"_")
    idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
    namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
    variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
    return(variableMetadata)
}

#The function annotateDiffreport without the corr function which bugs
annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv") {
    # Resolve the bug with x11, with the function png
    options(bitmapType='cairo')

    #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
    res=try(is.null(xset@filled))

    # ------ annot -------
    listArguments[["calcCiS"]]=as.logical(listArguments[["calcCiS"]])
    listArguments[["calcIso"]]=as.logical(listArguments[["calcIso"]])
    listArguments[["calcCaS"]]=as.logical(listArguments[["calcCaS"]])

    # common parameters
    listArguments4annotate = list(object=xset,
    nSlaves=listArguments[["nSlaves"]],sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],
    maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],
    ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],
    polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])

    # quick == FALSE
    if(listArguments[["quick"]]==FALSE) {
        listArguments4annotate = append(listArguments4annotate,
            list(graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],
            calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]]))
        # no ruleset
        if (!is.null(listArguments[["multiplier"]])) {
            listArguments4annotate = append(listArguments4annotate,
                list(multiplier=listArguments[["multiplier"]]))
        }
        # ruleset
        else {
            rulset=read.table(listArguments[["rules"]], h=T, sep=";")
            if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep="\t")
            if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep=",")
            if (ncol(rulset) < 4) {
                error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
                print(error_message)
                stop(error_message)
            }

            listArguments4annotate = append(listArguments4annotate,
                list(rules=rulset))
        }
    }


    # launch annotate
    xa = do.call("annotate", listArguments4annotate)
    peakList=getPeaklist(xa,intval=listArguments[["intval"]])
    peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");

    # --- dataMatrix ---
    dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))]
    write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)


    # --- Multi condition : diffreport ---
    diffrepOri=NULL
    if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
        #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
        res=try(is.null(xset@filled))
        classes=levels(sampclass(xset))
        x=1:(length(classes)-1)
        for (i in seq(along=x) ) {
            y=1:(length(classes))
            for (n in seq(along=y)){
                if(i+n <= length(classes)){
                    filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
                    
                    diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]])
                    
                    diffrepOri = diffrep
                    
                    # renamming of the column rtmed to rt to fit with camera peaklist function output
                    colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
                    colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
                    
                    # combines results and reorder columns
                    diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
                    diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
                    
                    diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]])
                    diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
                    
                    if(listArguments[["sortpval"]]){
                        diffrep=diffrep[order(diffrep$pvalue), ]
                    }
                    
                    dir.create("tabular")
                    write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep=""))
                    
                    if (listArguments[["eicmax"]] != 0) {
                        diffreport_png2pdf(filebase)
                    }
                }
            }
        }
    }


    # --- variableMetadata ---
    variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
    variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
    variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
    # if we have 2 conditions, we keep stat of diffrep
    if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) {
        variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
        if(exists("listArguments[[\"sortpval\"]]")){
            variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
        }
    }

    variableMetadataOri=variableMetadata
    write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)

    return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri));

}


combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, convertRTMinute=F, numDigitsMZ=0, numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){

    #Load the two Rdata to extract the xset objects from positive and negative mode
    cat("\tObject xset from positive mode\n")
    print(xaP)
    cat("\n")

    cat("\tObject xset from negative mode\n")
    print(xaN)
    cat("\n")

    cat("\n")
    cat("\tCombining...\n")
    #Convert the string to numeric for creating matrix
    row=as.numeric(strsplit(ruleset,",")[[1]][1])
    column=as.numeric(strsplit(ruleset,",")[[1]][2])
    ruleset=cbind(row,column)
    #Test if the file comes from an older version tool
    if ((!is.null(xaP)) & (!is.null(xaN))) {
        #Launch the combinexsannos function from CAMERA
        cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
    } else {
        stop("You must relauch the CAMERA.annotate step with the lastest version.")
    }

    if(pos){
        xa=xaP
        listOFlistArgumentsP=listOFlistArguments
        mode="neg. Mode"
    } else {
        xa=xaN
        listOFlistArgumentsN=listOFlistArguments
        mode="pos. Mode"
    }

    peakList=getPeaklist(xa)
    peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
    variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
    variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]

    #Test if there are more than two classes (conditions)
    if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
        diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
        diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")

        variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
        variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
    }

    rownames(variableMetadata) = NULL
    #TODO: checker
    #colnames(variableMetadata)[1:2] = c("name","mz/rt");

    variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
    variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)

    #If the user want to keep only the metabolites which match a difference
    if(keep_meta){
        variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
    }

    #Write the output into a tsv file
    write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
    return(variableMetadata);

}

# This function get the raw file path from the arguments
getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) {
    if (!is.null(listArguments[["zipfile"]]))           zipfile = listArguments[["zipfile"]]
    if (!is.null(listArguments[["zipfilePositive"]]))   zipfile = listArguments[["zipfilePositive"]]
    if (!is.null(listArguments[["zipfileNegative"]]))   zipfile = listArguments[["zipfileNegative"]]

    if (!is.null(listArguments[["singlefile_galaxyPath"]])) {
        singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]];
        singlefile_sampleNames = listArguments[["singlefile_sampleName"]]
    }
    if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) {
        singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]];
        singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]]
    }
    if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) {
        singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]];
        singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]]
    }
    if (exists("singlefile_galaxyPaths")){
        singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,","))
        singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,","))

        singlefile=NULL
        for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
            singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i]
            singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i]
            singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
        }
    }
    for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
        listArguments[[argument]]=NULL
    }
    return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments))
}


# This function retrieve the raw file in the working directory
#   - if zipfile: unzip the file with its directory tree
#   - if singlefiles: set symlink with the good filename
retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
    if(!is.null(singlefile) && (length("singlefile")>0)) {
        for (singlefile_sampleName in names(singlefile)) {
            singlefile_galaxyPath = singlefile[[singlefile_sampleName]]
            if(!file.exists(singlefile_galaxyPath)){
                error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
                print(error_message); stop(error_message)
            }

            file.symlink(singlefile_galaxyPath,singlefile_sampleName)
        }
        directory = "."

    }
    if(!is.null(zipfile) && (zipfile!="")) {
        if(!file.exists(zipfile)){
            error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
            print(error_message)
            stop(error_message)
        }

        #list all file in the zip file
        #zip_files=unzip(zipfile,list=T)[,"Name"]

        #unzip
        suppressWarnings(unzip(zipfile, unzip="unzip"))

        #get the directory name
        filesInZip=unzip(zipfile, list=T);
        directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
        directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
        directory = "."
        if (length(directories) == 1) directory = directories

        cat("files_root_directory\t",directory,"\n")

    }
    return (directory)
}