annotate my_VDM_tool.R @ 3:44e4f5bfebde draft default tip

Uploaded
author fxce
date Mon, 16 Nov 2020 13:59:53 +0000
parents 472ea11c2b25
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
1 #!/usr/bin/env Rscript
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
2 # rm(list=ls())
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
3
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
4 #Rscript my_VDM_tool.R --inf "nor22.vcf" --itype "C.elegans" --qual 200 --allr "AB" --snp TRUE --lsp 0.4 --pcol "black" --lcol "red" --xstand TRUE --bsize 1000000 --freqthr 0.0-1.0 --bnorm FALSE --exclf "FALSE" --exclcol "green" --outn "test-output.txt" --pdfn "test-plot.pdf"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
5
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
6 library("getopt")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
7 #input from trailing line arguments
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
8 args <- commandArgs(trailingOnly = TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
9 #read the options from input commandArgs
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
10 option_specification = matrix(c(
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
11 'inf','in',1,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
12 'itype','i',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
13 'qual','q',2,'double',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
14 'allr','a',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
15 'snp','s',2,'logical',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
16 'lsp','l',2,'double',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
17 'pcol','pc',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
18 'lcol','lc',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
19 'xstand','x',2,'logical',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
20 'bsize','b',2,'integer',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
21 'freqthr','ft',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
22 'bnorm','n',2,'logical',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
23 'exclf','ef',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
24 'exclcol','ec',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
25 'outn','o',2,'character',
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
26 'pdfn','p',2,'character'
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
27 ), byrow=TRUE, ncol=4)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
28 options = getopt(option_specification)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
29
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
30 myfunction<-function(inf,itype,qual,allr,snp,lsp,pcol,lcol,xstand,bsize,freqthr,bnorm,exclf,exclcol,outn,pdfn){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
31 #input file
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
32 filename<-inf
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
33 #choose species for chrom config
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
34 interval_type<-itype
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
35 #quality filter
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
36 read_qual<-as.numeric(qual)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
37 #allele ratio from AB or AO/(AO+RO)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
38 allele_ratio<-allr
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
39 #snp only or include all variant types
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
40 snp_only<-snp
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
41 #loess span
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
42 loess_span<-as.numeric(lsp)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
43 #scatterplot point colour
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
44 plot_color<-pcol
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
45 #loess curve colour
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
46 loess_color<-lcol
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
47 #uniform y-axis scaling
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
48 xaxis_standard<-xstand
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
49 #binsize
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
50 bin_size<-as.numeric(bsize)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
51
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
52 #allele ratio greater or equal to this value considered hom ALT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
53 threshold_upper<-as.numeric(gsub(".*-","",freqthr))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
54 #allele ratio less than or equal to this value considered hom REF
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
55 threshold_lower<-as.numeric(gsub("-.*","",freqthr))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
56
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
57 #normalize frequency barplot
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
58 bfreq_norm<-bnorm
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
59 #filenames for variant subtraction
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
60 exclusion_list<-exclf
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
61 #pre-subtraction loess curve colour
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
62 excl_loess_color<-exclcol
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
63 #filename for ouput table
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
64 vcfoutput_filename<-outn
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
65 #filename for pdf
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
66 pdf_filename<-pdfn
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
67
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
68 #transparency for selected colour (to see plot points underneath)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
69 plot_color<-rgb(c(col2rgb(plot_color)[1]),c(col2rgb(plot_color)[2]),c(col2rgb(plot_color)[3]),max=255,alpha=150)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
70 loess_color<-rgb(c(col2rgb(loess_color)[1]),c(col2rgb(loess_color)[2]),c(col2rgb(loess_color)[3]),max=255,alpha=150)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
71 excl_loess_color<-rgb(c(col2rgb(excl_loess_color)[1]),c(col2rgb(excl_loess_color)[2]),c(col2rgb(excl_loess_color)[3]),max=255,alpha=150)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
72
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
73 ###FIXED PARAMETERS
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
74 #chromosome intervals in Mb rather than custom
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
75 interval_unit<-1000000
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
76 #linkage scatter plot yaxis max value=1
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
77 sp_yaxis<-1
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
78
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
79 #only use variants with allele ratio in this range for subtraction (dependent on AB/ratio option)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
80 excl_rthreshold_upper<-1
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
81 excl_rthreshold_lower<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
82 #filename for parsed original vcf
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
83 vcfparsed_filename<-"parsed.txt"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
84
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
85 fastmode<-"TRUE"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
86
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
87 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
88 ###READ IN VCF FILE
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
89 #extract column names
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
90 vcf_readin<-readLines(filename)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
91 #find header line, i.e. last line to begin with #
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
92 for(l in 1:length(vcf_readin)){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
93 vcf_readinl<-vcf_readin[l]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
94 if(substr(vcf_readinl,1,1)=="#"){next}
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
95 else if(substr(vcf_readinl,1,1)!="#"){rowline<-l-1;break}
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
96 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
97 vcf_header<-vcf_readin[rowline]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
98 #e.g. CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\trgSM"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
99 vcf_header<-gsub("#","",vcf_header)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
100 vcf_colnames<-unlist(strsplit(vcf_header,"\t"))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
101
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
102 #extract data (hashed vcf header skipped with read.table)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
103 vcf_rtable<-read.table(filename,sep="\t",stringsAsFactors=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
104 names(vcf_rtable)<-vcf_colnames
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
105
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
106 if(fastmode=="TRUE"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
107 #to speed up runtime- quality filter here ny skipping parsing of full vcf (parsed output file incomplete)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
108 vcf_rtable<-subset(vcf_rtable,QUAL>=read_qual)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
109 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
110
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
111
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
112 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
113 ###PREPARE DATA
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
114 vcfinfo_dat<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
115 multiallele_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
116 diviserror_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
117 for(i in c(1:nrow(vcf_rtable))){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
118 vcf_line<-vcf_rtable[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
119 #remove chrom or chr prefix from chromosome value
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
120 if(grepl("chrom",vcf_line$CHROM,ignore.case=TRUE)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
121 vcf_line$CHROM<-gsub("chrom","",vcf_line$CHROM,ignore.case=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
122 }else if(grepl("chr",vcf_line$CHROM,ignore.case=TRUE)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
123 vcf_line$CHROM<-gsub("chr","",vcf_line$CHROM,ignore.case=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
124 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
125 #PARSE INFO
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
126 vcfinfo_split<-strsplit(vcf_line$INFO,split=";")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
127 vcfinfo_coln<-gsub("=.*","",unlist(vcfinfo_split))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
128 vcfinfo_cold<-gsub(".*=","",unlist(vcfinfo_split))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
129 vcfinfo_ldat<-data.frame(t(vcfinfo_cold),stringsAsFactors=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
130 names(vcfinfo_ldat)<-vcfinfo_coln
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
131
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
132 #skip if commas in values to avoid returning errors
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
133 if(grepl(",",vcfinfo_ldat$AO)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
134 multiallele_counter<-multiallele_counter+1
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
135 next
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
136 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
137 #skip divide by zero errors (under "ratio" setting for ratio calculation)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
138 if(as.numeric(vcfinfo_ldat$AO)+as.numeric(vcfinfo_ldat$RO)=="0"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
139 diviserror_counter<-diviserror_counter+1
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
140 next
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
141 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
142
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
143 #specific accounting for nonstandard categories
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
144 #LOF columns only present for loss-of-function variants + assign NA values to all other variants
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
145 if(("LOF" %in% names(vcfinfo_ldat))==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
146 LOF<-vcfinfo_ldat$LOF
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
147 vcfinfo_ldat<-vcfinfo_ldat[,!names(vcfinfo_ldat) %in% "LOF"]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
148 vcfinfo_ldat<-cbind(vcfinfo_ldat,LOF)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
149 }else{
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
150 LOF<-"NA"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
151 vcfinfo_ldat<-cbind(vcfinfo_ldat,LOF)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
152 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
153 #NMD columns only present for nonsense-mediated-decay variants + assign NA values to all other variants
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
154 if(("NMD" %in% names(vcfinfo_ldat))==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
155 NMD<-vcfinfo_ldat$NMD
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
156 vcfinfo_ldat<-vcfinfo_ldat[,!names(vcfinfo_ldat) %in% "NMD"]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
157 vcfinfo_ldat<-cbind(vcfinfo_ldat,NMD)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
158 }else{
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
159 NMD<-"NA"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
160 vcfinfo_ldat<-cbind(vcfinfo_ldat,NMD)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
161 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
162 #PARSE ANNOTATION
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
163 ann_rparsed<-unlist(strsplit(vcfinfo_ldat$ANN[1],split="\\|"))[1:20]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
164 ann_rparsed[ann_rparsed==""]<-"novalue"
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
165 ann_parsed<-data.frame(t(ann_rparsed),stringsAsFactors=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
166 names(ann_parsed)<-paste("ANN",c(1:dim(ann_parsed)[2]),sep="")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
167 #remove duplicate redundant INFO column (fully parsed)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
168 vcf_line<-vcf_line[,names(vcf_line)!="INFO"]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
169
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
170 #dataset keeping parsed annotation (partial parsed-for relevant)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
171 vcfinfo_ldat<-vcfinfo_ldat[,names(vcfinfo_ldat)!="ANN"]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
172 #append copy of original data to parsed data
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
173 vcfinfo_lldat<-cbind(vcf_line,vcfinfo_ldat,ann_parsed)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
174 vcfinfo_dat<-rbind(vcfinfo_dat,vcfinfo_lldat)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
175
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
176 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
177
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
178 print(paste("rows with multiple alleles skipped: ",multiallele_counter,sep=""))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
179 # print(paste("rows with AO+RO=0 (not multiple alleles) skipped: ",diviserror_counter,sep=""))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
180
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
181 #ENSURE CORRECT DATATYPES
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
182 #convert dataframe columns of factor type to character type
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
183 vcfinfo_dat<-data.frame(lapply(vcfinfo_dat,as.character),stringsAsFactors=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
184 #convert to numeric if column is numeric and not string
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
185 for(n in c(1:dim(vcfinfo_dat)[2])){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
186 #suppress warnings when columns with strings encountered (not converted)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
187 suppressWarnings(
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
188 colnum_index<-!is.na(as.numeric(vcfinfo_dat[,n]))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
189 )
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
190 if(all(colnum_index)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
191 vcfinfo_dat[,n]<-as.numeric(vcfinfo_dat[,n])
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
192 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
193 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
194
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
195
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
196 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
197 #RATIO CALCULATION
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
198 #ratio calculation from AO and RO
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
199 RATIO<-c(vcfinfo_dat$AO/(vcfinfo_dat$AO+vcfinfo_dat$RO))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
200 #add adj_AB for AB=0->AO=1 conversion
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
201 adj_AB<-replace(vcfinfo_dat$AB,vcfinfo_dat$AB==0,1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
202 vcfinfo_dat<-cbind(vcfinfo_dat,RATIO,adj_AB)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
203 vcfinfo_dat<-vcfinfo_dat[with(vcfinfo_dat,order(CHROM,POS)),]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
204
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
205 vcfinfo_pdat<-vcfinfo_dat
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
206 vcfinfo_dat<-subset(vcfinfo_dat,QUAL>=read_qual)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
207
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
208 #CONSIDER ONLY SNP VARIANTS
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
209 if(snp_only==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
210 vcfinfo_dat<-subset(vcfinfo_dat,CIGAR=="1X")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
211 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
212
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
213 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
214 #SUBTRACT VARIANTS FROM EXCLUSION LIST
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
215 if(exclusion_list!="FALSE"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
216 #keep copy of pre-subtraction data for later plotting
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
217 vcfinfo_origdat<-vcfinfo_dat
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
218
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
219 #identifiers for exclusion list based on CHROM/POS/REF/ALT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
220 index1<-paste(vcfinfo_dat$CHROM,vcfinfo_dat$POS,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
221 index2<-paste(vcfinfo_dat$CHROM,vcfinfo_dat$POS,vcfinfo_dat$REF,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
222 index3<-paste(vcfinfo_dat$CHROM,vcfinfo_dat$POS,vcfinfo_dat$REF,vcfinfo_dat$ALT,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
223 vcfinfo_dat<-cbind(vcfinfo_dat,index1,index2,index3)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
224
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
225 print(paste("before subtraction: ",nrow(vcfinfo_dat),sep=""))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
226 #loop and subtract through exclusion lists (if multiple files)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
227 for(exclusion_ind in exclusion_list){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
228 exclin<-read.table(exclusion_ind,header=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
229
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
230 #THRESHOLD FILTER ON EXCLUSION LIST VARIANTS
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
231 if(allele_ratio=="AB"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
232 exclin<-subset(exclin,adj_AB>=excl_rthreshold_lower)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
233 exclin<-subset(exclin,adj_AB<=excl_rthreshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
234 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
235 if(allele_ratio=="ratio"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
236 exclin<-subset(exclin,ratio>=excl_rthreshold_lower)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
237 exclin<-subset(exclin,ratio>=excl_rthreshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
238 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
239
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
240 #identifiers for vcf data based on CHROM/POS/REF/ALT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
241 index1<-paste(exclin$CHROM,exclin$POS,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
242 index2<-paste(exclin$CHROM,exclin$POS,exclin$REF,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
243 index3<-paste(exclin$CHROM,exclin$POS,exclin$REF,exclin$ALT,sep="_")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
244 exclin<-cbind(exclin,index1,index2,index3)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
245 #exclude based on CHROM+POS+REF+ALT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
246 vcfinfo_dat<-subset(vcfinfo_dat,!(index3 %in% exclin$index3))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
247 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
248 print(paste("after subtraction: ",nrow(vcfinfo_dat),sep=""))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
249 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
250
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
251
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
252 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
253 #WRITE TO OUTPUT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
254 #select relevant columns 2 variant type; 4 gene; !5 wormbase ID; 8 type change; 10 nucleotide change; 11 amino acid change; 16 warning message
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
255 vcfinfo_simp<-subset(vcfinfo_dat,select=c("CHROM","POS","QUAL","DP","REF","ALT","AB","AO","RO","RATIO","adj_AB","ANN2","ANN4","ANN8","ANN10","ANN11","ANN16"))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
256 names(vcfinfo_simp)<-c("CHROM","POS","QUAL","DP","REF","ALT","AB","AO","RO","RATIO","adj_AB","VARTYPE","GENE","TYPE","NTCHANGE","PRCHANGE","WARNINGS")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
257 vcfsimp_dat<-vcfinfo_simp[with(vcfinfo_simp,order(CHROM,POS)),]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
258
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
259 #write table with quality filtered variants for VDM plotting and relevant columns
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
260 try(
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
261 write.table(vcfsimp_dat,vcfoutput_filename,sep="\t",quote=FALSE,row.names=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
262 ,silent=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
263 # #write table with all unfiltered variants and all columns including parsed INFO
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
264 # if(fastmode!="TRUE"){}
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
265 # try(
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
266 # write.table(vcfinfo_pdat,vcfparsed_filename,sep="\t",quote=FALSE,row.names=FALSE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
267 # ,silent=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
268 # }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
269
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
270 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
271 ###CHROMOSOME (INTERVAL) ARRANGEMENT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
272 #define chromosome and chromosome size in Mb
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
273 if(interval_type == 'C.elegans'){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
274 chrom_n<-c('I','II','III','IV','V','X')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
275 chrom_mb<-c(16,16,14,18,21,18)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
276 interval_frame<-data.frame(chrom_n,chrom_mb)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
277 } else if(interval_type == 'Zebrafish'){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
278 chrom_n<-c('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
279 chrom_mb<-c(61,61,64,63,76,60,78,57,59,47,47,51,55,54,48,59,54,50,51,56,45,43,47,44,39)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
280 interval_frame<-data.frame(chrom_n,chrom_mb)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
281 } else if(interval_type == 'Brachypodium'){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
282 chrom_n<-c('1','2','3','4','5')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
283 chrom_mb<-c(75,60,60,50,30)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
284 interval_frame<-data.frame(chrom_n,chrom_mb)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
285 } else if(interval_type == 'Arabidopsis'){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
286 chrom_n<-c('1','2','3','4','5')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
287 chrom_mb<-c(31, 20,24,19,27 )
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
288 interval_frame<-data.frame(chrom_n,chrom_mb)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
289 } else{
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
290 #user interval file- no headers, with chromosome in column 1 (format CHR# or CHROM#) and size in Mb (rounded up) in column 2
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
291 user_interval_type<-read.table(interval_type)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
292 if(grepl("chrom",user_interval_type[1,1],ignore.case=TRUE)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
293 user_interval_type[,1]<-gsub("chrom","",user_interval_type[,1],ignore.case=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
294 }else if(grep("chr",user_interval_type[1,1],ignore.case=TRUE)==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
295 user_interval_type[,1]<-gsub("chr","",user_interval_type[,1],ignore.case=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
296 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
297 chrom_n<-user_interval_type[,1]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
298 chrom_mb<-user_interval_type[,2]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
299 interval_frame<-data.frame(chrom_n,chrom_mb)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
300 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
301 names(interval_frame)<-c("CHROM","INTERVAL")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
302
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
303
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
304 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
305 ###PLOTTING
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
306 #VDM SCATTER PLOT
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
307 #save to pdf
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
308 pdf(file=pdf_filename,width=9,height=8)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
309 #par(mfrow=c(2,3))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
310 for(chromind in interval_frame$CHROM){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
311 #subset by data by chromosome for plotting
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
312 intervalind<-interval_frame$INTERVAL[interval_frame$CHROM==chromind]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
313 chr_dat<-subset(vcfsimp_dat,CHROM==chromind,silent=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
314
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
315 #same subsetting by chromosome for pre-subtraction data
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
316 if(exclusion_list!="FALSE"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
317 chr_origdat<-subset(vcfinfo_origdat,CHROM==chromind,silent=TRUE)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
318 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
319
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
320 #define x-axis upper limit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
321 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
322 #for standardized x-axis (max x-axis chromosome length)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
323 scupper_xaxis<-max(interval_frame$INTERVAL)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
324 scupper_xval<-scupper_xaxis*interval_unit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
325 } else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
326 scupper_xaxis<-intervalind
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
327 scupper_xval<-intervalind*interval_unit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
328 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
329
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
330 if(allele_ratio=="AB"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
331 plot(chr_dat$POS,chr_dat$adj_AB,cex=0.60,xlim=c(0,scupper_xval),ylim=c(0,sp_yaxis),main=paste("Chr",chromind," Variant Discovery Mapping",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Ratio of Variant Reads/Total Reads [AB]',pch=10, col=plot_color,xaxt='n')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
332 try(lines(loess.smooth(chr_dat$POS,chr_dat$adj_AB,span=loess_span),lwd=5,col=loess_color))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
333 #plot loess curve for data without subtraction of exclusion variants
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
334 if(exclusion_list!="FALSE"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
335 try(lines(loess.smooth(chr_origdat$POS,chr_origdat$adj_AB,span=loess_span),lty="longdash",lwd=4,col=excl_loess_color))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
336 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
337
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
338 axis(1,at=seq(0,scupper_xval,by=interval_unit),labels=c(0:scupper_xaxis))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
339 abline(h=seq(0,sp_yaxis,by=0.1),v=c(1:scupper_xaxis)*interval_unit,col="gray")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
340 } else if(allele_ratio=="ratio"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
341 plot(chr_dat$POS,chr_dat$RATIO,cex=0.60,xlim=c(0,scupper_xval),ylim=c(0,sp_yaxis),main=paste("Chr",chromind," Variant Discovery Mapping",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Ratio of Variant Reads/Total Reads [ratio]',pch=10, col=plot_color,xaxt='n')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
342 try(lines(loess.smooth(chr_dat$POS,chr_dat$RATIO,span=loess_span),lwd=5,col=loess_color))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
343 #plot loess curve for data without subtraction of exclusion variants
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
344 if(exclusion_list!="FALSE"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
345 try(lines(loess.smooth(chr_origdat$POS,chr_origdat$adj_AB,span=loess_span),lty="longdash",lwd=4,col=excl_loess_color))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
346 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
347 axis(1,at=seq(0,scupper_xval,by=interval_unit),labels=c(0:scupper_xaxis))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
348 abline(h=seq(0,sp_yaxis,by=0.1),v=c(1:scupper_xaxis)*interval_unit,col="gray")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
349 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
350 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
351
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
352
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
353 ######################
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
354 #graph barplots
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
355 location_index<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
356 meanSNP_dat<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
357 # prepare table of counts and calculations
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
358 for(chromind in interval_frame$CHROM){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
359 #for standardized x-axis
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
360 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
361 intervalind<-max(interval_frame$INTERVAL)*1000000/bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
362 } else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
363 intervalind<-interval_frame$INTERVAL[interval_frame$CHROM==chromind]*1000000/bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
364 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
365 #start intervals with **1 and end with **0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
366 interval_begin<-c(((0:(intervalind-1))*bin_size)+1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
367 interval_end<-c((1:intervalind)*bin_size)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
368
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
369 #define x-axis upper limit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
370 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
371 upper_xaxis<-max(interval_frame$INTERVAL)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
372 } else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
373 upper_xaxis<-interval_frame$INTERVAL[interval_frame$CHROM==chromind]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
374 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
375 #prepare columns
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
376 snp_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
377 purealt_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
378 pureref_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
379 het_counter<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
380 chr_mean<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
381 normed_freq<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
382 ratio<-0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
383
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
384 interval_index<-data.frame(chromind,interval_begin,interval_end,snp_counter,purealt_counter,pureref_counter,het_counter,chr_mean,normed_freq)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
385 chr_dat<-subset(vcfinfo_dat,CHROM==chromind)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
386 #ratio calculation
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
387 ratio<-chr_dat$AO/(chr_dat$AO+chr_dat$RO)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
388
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
389 #if counter based on adj_AB or ratio
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
390 if(allele_ratio=="ratio"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
391 chr_purealtdat<-subset(chr_dat,ratio>=threshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
392 chr_purerefdat<-subset(chr_dat,ratio<=threshold_lower)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
393 chr_hetdat<-subset(chr_dat,ratio>threshold_lower & ratio<threshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
394 } else if(allele_ratio=="AB"){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
395 chr_purealtdat<-subset(chr_dat,adj_AB>=threshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
396 chr_purerefdat<-subset(chr_dat,adj_AB<=threshold_lower)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
397 chr_hetdat<-subset(chr_dat,adj_AB>threshold_lower & adj_AB<threshold_upper)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
398 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
399 #if chromosome with data, count number of snps within each bin (positions rounded up to nearest bin), else skip to next chromosome
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
400 if(dim(chr_dat)[1]>0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
401 for(i in 1:dim(chr_dat)[1]){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
402 chr_datind<-chr_dat[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
403 #round up to nearest bin-size interval
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
404 chr_datind_upper<-ceiling(chr_datind$POS/bin_size)*bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
405 interval_coln<-NULL;interval_rown<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
406 #identify row and and counter column to increment
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
407 interval_coln<-which(names(interval_index)=="snp_counter")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
408 interval_rown<-match(chr_datind_upper,interval_index$interval_end)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
409 interval_index[interval_rown,interval_coln]<-c(interval_index$snp_counter[interval_rown]+1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
410 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
411 }else{
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
412 next
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
413 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
414 #if chromosome with pure AO, count number of snps with each bin (positions rounded up to nearest bin)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
415 if(dim(chr_purealtdat)[1]>0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
416 for(i in 1:dim(chr_purealtdat)[1]){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
417 chr_purealtind<-chr_purealtdat[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
418 chr_purealtind_upper<-ceiling(chr_purealtind$POS/bin_size)*bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
419 interval_coln<-NULL;interval_rown<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
420 interval_coln<-which(names(interval_index)=="purealt_counter")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
421 interval_rown<-match(chr_purealtind_upper,interval_index$interval_end)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
422 interval_index[interval_rown,interval_coln]<-c(interval_index$purealt_counter[interval_rown]+1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
423 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
424 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
425 #if chromosome with pure RO, count number of snps with each bin (positions rounded up to nearest bin)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
426 if(dim(chr_purerefdat)[1]>0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
427 for(i in 1:dim(chr_purerefdat)[1]){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
428 chr_purerefind<-chr_purerefdat[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
429 chr_purerefind_upper<-ceiling(chr_purerefind$POS/bin_size)*bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
430 interval_coln<-NULL;interval_rown<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
431 interval_coln<-which(names(interval_index)=="pureref_counter")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
432 interval_rown<-match(chr_purerefind_upper,interval_index$interval_end)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
433 interval_index[interval_rown,interval_coln]<-c(interval_index$pureref_counter[interval_rown]+1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
434 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
435 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
436 #if chromosome with hets, count number of snps with each bin (positions rounded up to nearest bin)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
437 if(dim(chr_hetdat)[1]>0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
438 for(i in 1:dim(chr_hetdat)[1]){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
439 chr_hetind<-chr_hetdat[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
440 chr_hetind_upper<-ceiling(chr_hetind$POS/bin_size)*bin_size
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
441 interval_coln<-NULL;interval_rown<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
442 interval_coln<-which(names(interval_index)=="het_counter")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
443 interval_rown<-match(chr_hetind_upper,interval_index$interval_end)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
444 interval_index[interval_rown,interval_coln]<-c(interval_index$het_counter[interval_rown]+1)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
445 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
446 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
447 #irrespective of standardized x-axis, mean should be calculated from actual interval range of chromosome
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
448 chr_mean<-sum(interval_index$purealt_counter)/(interval_frame$INTERVAL[interval_frame$CHROM==chromind]*1000000/bin_size)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
449 interval_index$chr_mean<-chr_mean
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
450 meanSNP_dat<-rbind(meanSNP_dat,data.frame(chromind,chr_mean))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
451 #normalization treatment for if SNPs are AO=0, AO=total SNPs, or AO and RO in bin
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
452 for(i in 1:dim(interval_index)[1]){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
453 chr_intind<-interval_index[i,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
454 #hom definition based on specified upper and lower thresholds
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
455 if(chr_intind$purealt_counter<=threshold_lower){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
456 interval_coln<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
457 interval_coln<-which(names(interval_index)=="normed_freq")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
458 interval_index[i,interval_coln]=0
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
459 } else if (chr_intind$purealt_counter==chr_intind$snp_counter){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
460 interval_coln<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
461 interval_coln<-which(names(interval_index)=="normed_freq")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
462 interval_index[i,interval_coln]=(chr_intind$purealt_counter)^2/chr_mean
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
463 } else {
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
464 interval_coln<-NULL
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
465 interval_coln<-which(names(interval_index)=="normed_freq")
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
466 interval_index[i,interval_coln]=chr_mean*(chr_intind$purealt_counter)^2/(chr_intind$snp_counter-chr_intind$purealt_counter)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
467 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
468 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
469 location_index<-rbind(location_index,interval_index)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
470 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
471
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
472 for(chromind in interval_frame$CHROM){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
473 interval_index<-location_index[location_index$chromind==chromind,]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
474 #assign 0 values to avoid empty datatable error
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
475 if(dim(interval_index)[1]==0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
476 interval_index[1,]<-rep(0,dim(interval_index)[2])
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
477 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
478 #set up x_axis
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
479 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
480 #for standardized x-axis (max x-axis chromosome length)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
481 bpupper_xaxis<-max(interval_frame$INTERVAL)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
482 bpupper_xval<-bpupper_xaxis*interval_unit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
483 }else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
484 bpupper_xaxis<-intervalind
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
485 bpupper_xval<-intervalind*interval_unit
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
486 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
487 #set up y_axis range for barplots
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
488 if(bfreq_norm==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
489 bp_yaxis<-5*ceiling(max(location_index$normed_freq)/5)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
490 #assign non-0 value to yaxis to avoid error
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
491 if(bp_yaxis==0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
492 bp_yaxis<-10
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
493 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
494 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
495 bplot<-barplot(interval_index$normed_freq,space=0,ylim=c(0,bp_yaxis),main=paste("Chr",chromind," Variant Only",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Normalised Frequency')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
496 }else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
497 bplot<-barplot(interval_index$normed_freq,space=0,xlim=c(0,bpupper_xaxis),ylim=c(0,bp_yaxis),main=paste("Chr",chromind," Variant Only",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Normalised Frequency')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
498 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
499
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
500 }else if(bfreq_norm==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
501 bp_yaxis<-5*ceiling(max(location_index$purealt_counter)/5)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
502 #assign non-0 value to yaxis to avoid error
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
503 if(bp_yaxis==0){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
504 bp_yaxis<-10
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
505 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
506 if(xaxis_standard==TRUE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
507 bplot<-barplot(interval_index$purealt_counter,space=0,ylim=c(0,bp_yaxis),main=paste("Chr",chromind," Variant Only",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Frequency')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
508 }else if(xaxis_standard==FALSE){
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
509 bplot<-barplot(interval_index$purealt_counter,space=0,xlim=c(0,bpupper_xaxis),ylim=c(0,bp_yaxis),main=paste("Chr",chromind," Variant Only",sep=""),xlab="Position along Chromosome (in Mb)",ylab='Frequency')
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
510 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
511 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
512 bp_xaxis1<-as.numeric(bplot)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
513 bp_xaxis2<-c(bp_xaxis1,tail(bp_xaxis1,1)+bp_xaxis1[2]-bp_xaxis1[1])
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
514 bp_xaxis<-bp_xaxis2-bp_xaxis1[1]
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
515
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
516 axis(1,at=bp_xaxis,labels=seq(0,bpupper_xaxis,by=c(bin_size/1000000)))
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
517 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
518 dev.off()
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
519 }
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
520
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
521 myfunction(inf=options$inf,itype=options$itype,qual=options$qual,allr=options$allr,snp=options$snp,lsp=options$lsp,pcol=options$pcol,lcol=options$lcol,
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
522 xstand=options$xstand,bsize=options$bsize,bnorm=options$bnorm,freqthr=options$freqthr,exclf=options$exclf,exclcol=options$exclcol,outn=options$outn,pdfn=options$pdfn)
472ea11c2b25 Uploaded
fxce
parents:
diff changeset
523