Mercurial > repos > davidvanzessen > shm_csr
comparison baseline/filter.r @ 63:8728284105ee draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 06 Dec 2017 08:04:52 -0500 |
parents | c33d93683a09 |
children | c6dd3215ebe0 |
comparison
equal
deleted
inserted
replaced
62:aa8d37bd1930 | 63:8728284105ee |
---|---|
6 print(paste("selection = ", selection)) | 6 print(paste("selection = ", selection)) |
7 | 7 |
8 | 8 |
9 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) | 9 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) |
10 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) | 10 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) |
11 | |
12 fix_column_names = function(df){ | |
13 if("V.DOMAIN.Functionality" %in% names(df)){ | |
14 names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" | |
15 print("found V.DOMAIN.Functionality, changed") | |
16 } | |
17 if("V.DOMAIN.Functionality.comment" %in% names(df)){ | |
18 names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" | |
19 print("found V.DOMAIN.Functionality.comment, changed") | |
20 } | |
21 return(df) | |
22 } | |
23 | |
24 gappeddat = fix_column_names(gappeddat) | |
11 | 25 |
12 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) | 26 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) |
13 | 27 |
14 dat = cbind(gappeddat, summarydat$AA.JUNCTION) | 28 dat = cbind(gappeddat, summarydat$AA.JUNCTION) |
15 | 29 |
22 dat$DGene = gsub("[*].*", "", dat$DGene) | 36 dat$DGene = gsub("[*].*", "", dat$DGene) |
23 | 37 |
24 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) | 38 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) |
25 dat$JGene = gsub("[*].*", "", dat$JGene) | 39 dat$JGene = gsub("[*].*", "", dat$JGene) |
26 | 40 |
27 #print(str(dat)) | 41 print(str(dat)) |
28 | 42 |
29 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) | 43 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) |
30 | 44 |
31 dat = dat[!duplicated(dat$past), ] | 45 dat = dat[!duplicated(dat$past), ] |
32 | 46 |
47 print(paste("Sequences remaining after duplicate filter:", nrow(dat))) | |
48 | |
33 dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] | 49 dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] |
34 | 50 |
51 print(paste("Sequences remaining after functionality filter:", nrow(dat))) | |
52 | |
53 print(paste("Sequences remaining:", nrow(dat))) | |
54 | |
35 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) | 55 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) |