Mercurial > repos > davidvanzessen > shm_csr
diff baseline/filter.r @ 0:c33d93683a09 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 13 Oct 2016 10:52:24 -0400 |
parents | |
children | 8728284105ee |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/filter.r Thu Oct 13 10:52:24 2016 -0400 @@ -0,0 +1,35 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("selection = ", selection)) + + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) + +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +#print(str(dat)) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)