annotate gfapts/inc/R/samvcf_data_parser.R @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
1 rm(list=ls())
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
2 options(warn=-1)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
3 args=commandArgs()[-c(1:4)]
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
4 infile=args[1]
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
5 outfile=sub("Temp.2R$", "var", infile)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
6
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
7 x=read.table(infile, sep="\t", header=FALSE, row.names=NULL, colClasses=c('factor', rep('integer', 2), rep('factor', 3), rep('integer', 5), 'factor'),
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
8 col.names=c('chr', 'start', 'end', 'ref', 'alt', 'zyg', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER'))
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
9
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
10 x=cbind(x, matrix(
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
11 as.numeric(format(sapply(1:nrow(x), function(i)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
12 with(x[i, ], c(with(binom.test(c(sum(NRF, NAF), sum(NRR, NAR))), p.value),
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
13 with(binom.test(c(NRF, NRR)), p.value),
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
14 with(binom.test(c(NAF, NAR)), p.value)))), digits=3, scientific=TRUE)),
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
15 ncol=3, byrow=TRUE, dimnames=list(NULL, c('p.strand', 'p.ref', 'p.alt')))
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
16 )
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
17
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
18 x=cbind(subset((x=cbind(x, do.call('rbind', lapply(1:nrow(x), function(i) with(x[i, ], {
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
19 AD=sum(NAF, NAR)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
20 DP=sum(NRF, NRR, AD)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
21 AF=signif(AD/DP, digits=3)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
22 VAR.FILTER=c(zyg!='unk' & QC>=20 & DP>=10 & AD>=5 & (p.strand>.05 | min(sum(NRF, NAF), sum(NRR, NAR))>=10) & (p.ref>.05 | min(NRF, NRR)>=10) & (p.alt>.05 | min(NAF, NAR)>=10))
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
23 cbind(DP,AD,AF, VAR.FILTER)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
24 }))))), select=-VAR.FILTER), VAR.FILTER=with(x, factor(VAR.FILTER, levels=c(0, 1), labels=c('SKIP', 'PASS'))))
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
25
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
26 write.table(as.matrix(x), outfile, quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
27 q(runLast=FALSE)