Mercurial > repos > sblanck > mpagenomics
diff segmentation.R @ 0:4d539083cf7f draft
planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author | sblanck |
---|---|
date | Tue, 12 May 2020 10:40:36 -0400 |
parents | |
children | 3fcbb8030fcc |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/segmentation.R Tue May 12 10:40:36 2020 -0400 @@ -0,0 +1,139 @@ +#!/usr/bin/env Rscript +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("optparse") + +##### Read options +option_list=list( + make_option("--input",type="character",default=NULL, dest="input"), + make_option("--output",type="character",default=NULL, dest="output"), + make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"), + make_option("--nbcall",type="character",default=NULL, dest="nbcall"), + make_option("--outputgraph",type="character",default=NULL, dest="outputgraph"), + make_option("--graph",type="character",default=NULL, dest="graph"), + make_option("--signalType",type="character",default=NULL, dest="signalType"), + make_option("--cellularity",type="character",default=NULL, dest="cellularity"), + make_option("--outputlog",type="character",default=NULL, dest="outputlog"), + make_option("--log",type="character",default=NULL, dest="log"), + make_option("--user_id",type="character",default=NULL, dest="userid"), + make_option("--method",type="character",default=NULL, dest="method") +); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if(is.null(opt$input)){ + print_help(opt_parser) + stop("input required.", call.=FALSE) +} + +#loading libraries + +input=opt$input +output=opt$output +tmp_dir=opt$new_file_path +nbcall=as.numeric(opt$nbcall) +outputgraph=type.convert(opt$outputgraph) +cellularity=as.numeric(opt$cellularity) +userId=opt$userid +method=opt$method +log=opt$log +outputlog=opt$outputlog +graph=opt$graph +signalType=opt$signalType + +#args<-commandArgs(TRUE) +# +#input=args[1] +#tmp_dir=args[2] +#nbcall=as.numeric(args[3]) +#cellularity=as.numeric(args[4]) +#output=args[5] +#method=args[6] +#userId=args[7] +#signalType=args[8] + +library(MPAgenomics) +workdir=file.path(tmp_dir,"mpagenomics",userId) +setwd(workdir) + +if (outputlog){ + sinklog <- file(log, open = "wt") + sink(sinklog ,type = "output") + sink(sinklog, type = "message") +} + +CN=read.table(input,header=TRUE) +uniqueChr=unique(CN$chromosome) +drops=c("chromosome","position","probeName") +CNsignal=CN[,!(names(CN)%in% drops),drop=FALSE] + +samples=names(CNsignal) + +if (signalType=="CN") +{ + +result=data.frame(sampleNames=character(0),chrom=character(0),chromStart=numeric(0),chromEnd=numeric(0),probes=numeric(0),means=numeric(0),calls=character(0),stringsAsFactors=FALSE) + +for (chr in uniqueChr) +{ +currentSubset=subset(CN, chromosome==chr) +currentPositions=currentSubset["position"] +for (sample in samples) + { + currentSignal=currentSubset[sample] + if (length(which(!is.na(unlist(currentSignal))))>1) + { + currentSeg=segmentation(signal=unlist(currentSignal),position=unlist(currentPositions),method=method) + callobj= callingObject(copynumber=currentSeg$signal, segmented=currentSeg$segmented,chromosome=rep(chr,length(currentSeg$signal)), position=currentSeg$startPos,sampleNames=sample) + currentCall=callingProcess(callobj,nclass=nbcall,cellularity=cellularity,verbose=TRUE) + currentResult=currentCall$segment + currentResult["sampleNames"]=c(rep(sample,length(currentCall$segment$chrom))) + result=rbind(result,currentResult) + } + } +} +finalResult=data.frame(sampleNames=result["sampleNames"],chrom=result["chrom"],chromStart=result["chromStart"],chromEnd=result["chromEnd"],probes=result["probes"],means=result["means"],calls=result["calls"],stringsAsFactors=FALSE) + +write.table(finalResult,output,row.names = FALSE, quote=FALSE, sep = "\t") +} else { + result=data.frame(sampleNames=character(0),chrom=character(0),start=numeric(0),end=numeric(0),points=numeric(0),means=numeric(0),stringsAsFactors=FALSE) + + for (chr in uniqueChr) + { + cat(paste0("chromosome ",chr,"\n")) + currentSubset=subset(CN, chromosome==chr) + currentPositions=currentSubset["position"] + for (sample in samples) + { + cat(paste0(" sample ",sample,"...")) + currentSignal=currentSubset[sample] + if (length(which(!is.na(unlist(currentSignal))))>1) + { + currentSeg=segmentation(signal=unlist(currentSignal),position=unlist(currentPositions),method=method) + currentResult=currentSeg$segment + currentResult["chrom"]=c(rep(chr,length(currentSeg$segment$means))) + currentResult["sampleNames"]=c(rep(sample,length(currentSeg$segment$means))) + result=rbind(result,currentResult) + + } + cat(paste0("OK\n")) + } + } + finalResult=data.frame(sampleNames=result["sampleNames"],chrom=result["chrom"],chromStart=result["start"],chromEnd=result["end"],probes=result["points"],means=result["means"],stringsAsFactors=FALSE) + write.table(finalResult,output,row.names = FALSE, quote=FALSE, sep = "\t") +} + +if (outputgraph){ + file.rename(file.path(tmp_dir,"mpagenomics",userId,"Rplots.pdf"), graph) +} + +if (outputlog){ + sink(type="output") + sink(type="message") + close(sinklog) +}