view segmentation.R @ 4:3fcbb8030fcc draft

"planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 40eda5ea3551e8b3bae32d0a8f405fe90ef22646-dirty"
author sblanck
date Mon, 12 Apr 2021 14:47:09 +0000
parents 4d539083cf7f
children
line wrap: on
line source

#!/usr/bin/env Rscript
# setup R error handling to go to stderr
options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )

# we need that to not crash galaxy with an UTF8 error on German LC settings.
# loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

library("optparse")

##### Read options
option_list=list(
		make_option("--input",type="character",default=NULL, dest="input"),
		make_option("--output",type="character",default=NULL, dest="output"),
		make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"),
		make_option("--nbcall",type="character",default=NULL, dest="nbcall"),
		make_option("--outputgraph",type="character",default=NULL, dest="outputgraph"),
		make_option("--graph",type="character",default=NULL, dest="graph"),
		make_option("--signalType",type="character",default=NULL, dest="signalType"),
		make_option("--cellularity",type="character",default=NULL, dest="cellularity"),
		make_option("--outputlog",type="character",default=NULL, dest="outputlog"),
		make_option("--log",type="character",default=NULL, dest="log"),
		make_option("--user_id",type="character",default=NULL, dest="userid"),
		make_option("--method",type="character",default=NULL, dest="method")
);

opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

if(is.null(opt$input)){
	print_help(opt_parser)
	stop("input required.", call.=FALSE)
}

#loading libraries

input=opt$input
output=opt$output
tmp_dir=opt$new_file_path
nbcall=as.numeric(opt$nbcall)
outputgraph=type.convert(opt$outputgraph)
cellularity=as.numeric(opt$cellularity)
userId=opt$userid
method=opt$method
log=opt$log
outputlog=opt$outputlog
graph=opt$graph
signalType=opt$signalType

#args<-commandArgs(TRUE)
#
#input=args[1]
#tmp_dir=args[2]
#nbcall=as.numeric(args[3])
#cellularity=as.numeric(args[4]) 
#output=args[5]
#method=args[6]
#userId=args[7]
#signalType=args[8]

library(MPAgenomics)
workdir=file.path(tmp_dir)
if (!dir.exists(workdir))
  dir.create(workdir, showWarnings = TRUE, recursive = TRUE)
setwd(workdir)

workdir

if (outputlog){
	sinklog <- file(log, open = "wt")
	sink(sinklog ,type = "output")
	sink(sinklog, type = "message")
} 

CN=read.table(input,header=TRUE)
uniqueChr=unique(CN$chromosome)
drops=c("chromosome","position","probeName")
CNsignal=CN[,!(names(CN)%in% drops),drop=FALSE]

samples=names(CNsignal)

if (signalType=="CN")
{

result=data.frame(sampleNames=character(0),chrom=character(0),chromStart=numeric(0),chromEnd=numeric(0),probes=numeric(0),means=numeric(0),calls=character(0),stringsAsFactors=FALSE)

for (chr in uniqueChr)
{
currentSubset=subset(CN, chromosome==chr)
currentPositions=currentSubset["position"]
for (sample in samples) 
  {
    currentSignal=currentSubset[sample]
	if (length(which(!is.na(unlist(currentSignal))))>1)
	{
		currentSeg=segmentation(signal=unlist(currentSignal),position=unlist(currentPositions),method=method)
    	callobj= callingObject(copynumber=currentSeg$signal, segmented=currentSeg$segmented,chromosome=rep(chr,length(currentSeg$signal)), position=currentSeg$startPos,sampleNames=sample)
	    currentCall=callingProcess(callobj,nclass=nbcall,cellularity=cellularity,verbose=TRUE)
	    currentResult=currentCall$segment
	    if(outputgraph)
  		{    
    	
		currentPos=unlist(currentPositions)
		figName <- sprintf("%s,%s", sample, chr);
          	pathname <- file.path(sprintf("%s.png", figName));
          	png(filename = pathname, width = 1280, height = 480)
          	plot(NA,xlim=c(min(currentPos),max(currentPos)), ylim=c(0,6),xlab="Position", main=figName,ylab="CN", pch=".")
		points(currentPos, unlist(currentSignal), pch=".");
		for(i in 1:nrow(currentResult))
            		lines(c(currentResult$chromStart[i],currentResult$chromEnd[i]),rep(currentResult$means[i],2),col="red",lwd=3)
          	dev.off()	
			
 		 }	
	    
	    currentResult["sampleNames"]=c(rep(sample,length(currentCall$segment$chrom)))
	    result=rbind(result,currentResult)
	}
  }
}
finalResult=data.frame(sampleNames=result["sampleNames"],chrom=result["chrom"],chromStart=result["chromStart"],chromEnd=result["chromEnd"],probes=result["probes"],means=result["means"],calls=result["calls"],stringsAsFactors=FALSE)

write.table(finalResult,output,row.names = FALSE, quote=FALSE, sep = "\t")
} else {
	result=data.frame(sampleNames=character(0),chrom=character(0),start=numeric(0),end=numeric(0),points=numeric(0),means=numeric(0),stringsAsFactors=FALSE)
	
	for (chr in uniqueChr)
	{
		cat(paste0("chromosome ",chr,"\n"))
		currentSubset=subset(CN, chromosome==chr)
		currentPositions=currentSubset["position"]
		for (sample in samples) 
		{
			cat(paste0("  sample ",sample,"..."))
			currentSignal=currentSubset[sample]
			if (length(which(!is.na(unlist(currentSignal))))>1)
			{
				currentSeg=segmentation(signal=unlist(currentSignal),position=unlist(currentPositions),method=method)
				currentResult=currentSeg$segment
				currentResult["chrom"]=c(rep(chr,length(currentSeg$segment$means)))
				currentResult["sampleNames"]=c(rep(sample,length(currentSeg$segment$means)))
				result=rbind(result,currentResult)
				if(outputgraph)
	                	{

                		currentPos=unlist(currentPositions) 
                		figName <- sprintf("%s,%s", sample, chr);
                		pathname <- file.path(sprintf("%s.png", figName));
                		png(filename = pathname, width = 1280, height = 480)
                		plot(NA,xlim=c(min(currentPos),max(currentPos)), ylim=c(0,1),xlab="Position", main=figName,ylab="CN", pch=".")
                		points(currentPos, unlist(currentSignal), pch=".");
				print(currentResult)
				for(i in 1:nrow(currentResult))
                       			lines(c(currentResult$start[i],currentResult$end[i]),rep(currentResult$means[i],2),col="red",lwd=3)
                		dev.off()
                        
                 	}

	
	
			}
			cat(paste0("OK\n"))
		}
	}
	finalResult=data.frame(sampleNames=result["sampleNames"],chrom=result["chrom"],chromStart=result["start"],chromEnd=result["end"],probes=result["points"],means=result["means"],stringsAsFactors=FALSE)
	colnames(finalResult)=c("sampleNames","chrom","chromStart","chromEnd","probes","means")
	write.table(finalResult,output,row.names = FALSE, quote=FALSE, sep = "\t")
}

if (outputgraph){
	library(zip)
        files2zip <- dir(pattern=".png")
        zipr(graph, files = files2zip)

}

if (outputlog){
	sink(type="output")
	sink(type="message")
	close(sinklog)
}