Mercurial > repos > davidvanzessen > argalaxy_tools
diff report_clonality/RScript.r @ 38:b6936fb52ab9 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 19 Apr 2017 10:21:01 -0400 |
parents | f37e072affc0 |
children | 106275b54470 |
line wrap: on
line diff
--- a/report_clonality/RScript.r Wed Apr 19 08:05:01 2017 -0400 +++ b/report_clonality/RScript.r Wed Apr 19 10:21:01 2017 -0400 @@ -768,7 +768,7 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -787,7 +787,7 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -806,7 +806,7 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -825,7 +825,7 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -841,7 +841,7 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -855,13 +855,11 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) - print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) - newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), P1=mean(.SD$P3V.nt.nb, na.rm=T), @@ -871,12 +869,11 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) - print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), @@ -887,7 +884,7 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), + Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) @@ -927,7 +924,6 @@ ggsave("DReadingFrame.pdf", D.REGION.reading.frame) - # ---------------------- AA composition in CDR3 ---------------------- AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] @@ -977,6 +973,7 @@ #generate the "Sequences that are present in more than one replicate" dataset clonaltype.in.replicates = inputdata +clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),] clonaltype.in.replicates = na.omit(clonaltype.in.replicates) clonaltype = unlist(strsplit(clonaltype, ",")) @@ -1023,5 +1020,3 @@ - -