view Dotplot_Release/Step2_data_filtering.R @ 0:dfa3436beb67 draft

Uploaded
author bornea
date Fri, 29 Jan 2016 09:56:02 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)

d = read.delim(args[1], header=T, as.is=T)

d2 = d
d2s = d

ss_cutoff <- as.numeric(args[2])
### Here I'm only going to take the preys which appeared in at least 2 baits with >args[2] counts
id = apply(d, 1, function(x) sum(x>ss_cutoff) >= 2)
id2 = apply(d, 1, function(x) sum(x>ss_cutoff) < 2)
d2 = d2[id, ]
d2s = d2s[id2, 0]
max.d2 = max(as.numeric(as.matrix(d2))) 
d2 = d2 / max.d2 * 10

d3 = data.frame(PROT = rownames(d2), d2)

outfile <- paste(c(args[3]), "dat", sep=".")

### The following file is the outcome of running this step.
write.table(d3, outfile, sep="\t", quote=F, row.names=F)
### This is the final input file for nested cluster algorithm

write.table(d2s, "singletons.txt", quote=F)