annotate Rscripts/ridb-regression.R @ 36:9018657a9f73

Added "Membership cutoff (0.0 - 1.0)" option
author pieter.lukasse@wur.nl
date Fri, 19 Sep 2014 11:25:09 +0200
parents 9d5f4f5f764b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
1 ##
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
2 #
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
3 # Performs regression analysis using either 3rd degree polynomial- or linear-method
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
4 #
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
5 ##
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
6
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
7 # Commandline arguments
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
8 args <- commandArgs(TRUE)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
9 if (length(args) < 7)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
10 stop(cat("Missing arguments, usage:\n\tRscript ridb-regression.R RI-database ",
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
11 "ouput_file logfile min_residuals range_mod pvalue rsquared method ",
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
12 "plot(yes/no) plot_archive"))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
13
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
14 ridb <- args[1]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
15 out_file <- args[2]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
16 logfile <- args[3]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
17 min_residuals <- as.integer(args[4])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
18 range_mod <- as.integer(args[5])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
19 pvalue <- as.double(args[6])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
20 rsquared <- as.double(args[7])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
21 method <- args[8]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
22 plot <- tolower(args[9])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
23 if (plot == 'true')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
24 plot_archive = args[10]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
25
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
26 # Do not show warnings etc.
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
27 sink(file='/dev/null')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
28
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
29 progress <- c()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
30 logger <- function(logdata) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
31 ## Logs progress, adds a timestamp for each event
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
32 #cat(paste(Sys.time(), "\t", logdata, "\n", sep="")) ## DEBUG
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
33 progress <<- c(progress, paste(Sys.time(), "\t", logdata, sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
34 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
35
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
36 logger("Reading Retention Index Database..")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
37
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
38 # Read Retention Index Database
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
39 ridb <- read.csv(ridb, header=TRUE, sep="\t")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
40 logger(paste("\t", nrow(ridb), "records read.."))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
41 # Get a unique list
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
42 gc_columns <- unique(as.vector(as.matrix(ridb['Column.name'])[,1]))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
43 cas_numbers <- unique(as.vector(as.matrix(ridb['CAS'])[,1]))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
44
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
45 add_poly_fit <- function(fit, gc1_index, gc2_index, range) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
46 pval = anova.lm(fit)$Pr
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
47 r.squared = summary(fit)$r.squared
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
48
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
49 data = rep(NA, 11)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
50 # Append results to matrix
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
51 data[1] = gc_columns[gc1_index] # Column 1
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
52 data[2] = gc_columns[gc2_index] # Column 2
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
53 data[3] = coefficients(fit)[1] # The 4 coefficients
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
54 data[4] = coefficients(fit)[2]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
55 data[5] = coefficients(fit)[3]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
56 data[6] = coefficients(fit)[4]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
57 data[7] = range[1] # Left limit
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
58 data[8] = range[2] # Right limit
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
59 data[9] = length(fit$residuals) # Number of datapoints analysed
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
60 data[10] = pval[1] # p-value for resulting fitting
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
61 data[11] = r.squared # R-squared
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
62 return(data)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
63 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
64
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
65
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
66 add_linear_fit <- function(fit, gc1_index, gc2_index, range) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
67 pval = anova.lm(fit)$Pr
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
68 r.squared = summary(fit)$r.squared
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
69
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
70 data = rep(NA, 7)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
71 # Append results to matrix
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
72 data[1] = gc_columns[gc1_index] # Column 1
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
73 data[2] = gc_columns[gc2_index] # Column 2
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
74 data[3] = coefficients(fit)[1] # The 4 coefficients
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
75 data[4] = coefficients(fit)[2]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
76 data[7] = length(fit$residuals) # Number of datapoints analysed
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
77 data[8] = pval[1] # p-value for resulting fitting
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
78 data[9] = r.squared # R-squared
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
79 return(data)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
80 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
81
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
82
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
83 add_fit <- function(fit, gc1_index, gc2_index, range, method) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
84 if (method == 'poly')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
85 return(add_poly_fit(fit, gc1_index, gc2_index, range))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
86 else
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
87 return(add_linear_fit(fit, gc1_index, gc2_index, range))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
88 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
89
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
90
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
91 plot_fit <- function(ri1, ri2, gc1_index, gc2_index, coeff, range, method) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
92 if (method == 'poly')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
93 pol <- function(x) coeff[4]*x^3 + coeff[3]*x^2 + coeff[2]*x + coeff[1]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
94 else
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
95 pol <- function(x) coeff[2]*x + coeff[1]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
96 pdf(paste('regression_model_',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
97 make.names(gc_columns[gc1_index]), '_vs_',
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
98 make.names(gc_columns[gc2_index]), '.pdf', sep=''))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
99 curve(pol, 250:3750, col="red", lwd=2.5, main='Regression Model', xlab=gc_columns[gc1_index],
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
100 ylab=gc_columns[gc2_index], xlim=c(250, 3750), ylim=c(250, 3750))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
101 points(ri1, ri2, lwd=0.4)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
102 # Add vertical lines showing left- and right limits when using poly method
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
103 if (method == 'poly')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
104 abline(v=range, col="grey", lwd=1.5)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
105 dev.off()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
106 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
107
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
108 # Initialize output dataframe
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
109 if (method == 'poly') {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
110 m <- data.frame(matrix(ncol = 11, nrow = 10))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
111 } else {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
112 m <- data.frame(matrix(ncol = 9, nrow = 10))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
113 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
114
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
115
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
116 get_fit <- function(gc1, gc2, method) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
117 if (method == 'poly')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
118 return(lm(gc1 ~ poly(gc2, 3, raw=TRUE)))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
119 else
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
120 return(lm(gc1 ~ gc2))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
121 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
122
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
123 # Permutate
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
124 k <- 1
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
125 logger(paste("Permutating (with ", length(gc_columns), " GC-columns)..", sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
126
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
127 for (i in 1:(length(gc_columns)-1)) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
128 logger(paste("\tCalculating model for ", gc_columns[i], "..", sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
129 breaks <- 0
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
130 for (j in (i+1):length(gc_columns)) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
131 col1 = ridb[which(ridb['Column.name'][,1] == gc_columns[i]),]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
132 col2 = ridb[which(ridb['Column.name'][,1] == gc_columns[j]),]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
133
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
134 # Find CAS numbers for which both columns have data (intersect)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
135 cas_intersect = intersect(col1[['CAS']], col2[['CAS']])
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
136
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
137 # Skip if number of shared CAS entries is < cutoff
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
138 if (length(cas_intersect) < min_residuals) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
139 breaks = breaks + 1
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
140 next
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
141 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
142 # Gather Retention Indices
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
143 col1_data = col1[['RI']][match(cas_intersect, col1[['CAS']])]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
144 col2_data = col2[['RI']][match(cas_intersect, col2[['CAS']])]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
145
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
146 # Calculate the range within which regression is possible (and move if 'range_mod' != 0)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
147 range = c(min(c(min(col1_data), min(col2_data))), max(c(max(col1_data), max(col2_data))))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
148 if (range_mod != 0) {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
149 # Calculate percentage and add/subtract from range
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
150 perc = diff(range) / 100
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
151 perc_cutoff = range_mod * perc
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
152 range = as.integer(range + c(perc_cutoff, -perc_cutoff))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
153 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
154
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
155 # Calculate model for column1 vs column2 and plot if requested
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
156 fit = get_fit(col1_data, col2_data, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
157 m[k,] = add_fit(fit, i, j, range, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
158
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
159 if (plot == 'true')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
160 plot_fit(col1_data, col2_data, i, j, coefficients(fit), range, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
161
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
162 # Calculate model for column2 vs column1 and plot if requested
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
163 fit = get_fit(col2_data, col1_data, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
164 m[k + 1,] = add_fit(fit, j, i, range, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
165
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
166 if (plot == 'true')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
167 plot_fit(col2_data, col1_data, j, i, coefficients(fit), range, method)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
168
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
169 k = k + 2
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
170 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
171 logger(paste("\t\t", breaks, " comparisons have been skipped due to nr. of datapoints < cutoff", sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
172 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
173
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
174 # Filter on pvalue and R-squared
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
175 logger("Filtering on pvalue and R-squared..")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
176 if (method == 'poly') {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
177 pval_index <- which(m[,10] < pvalue)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
178 rsquared_index <- which(m[,11] > rsquared)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
179 } else {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
180 pval_index <- which(m[,8] < pvalue)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
181 rsquared_index <- which(m[,9] > rsquared)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
182 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
183 logger(paste(nrow(m) - length(pval_index), " models discarded due to pvalue > ", pvalue, sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
184
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
185 logger(paste(nrow(m) - length(rsquared_index), " models discarded due to R-squared < ", rsquared, sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
186
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
187 # Remaining rows
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
188 index = unique(c(pval_index, rsquared_index))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
189
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
190 # Reduce dataset
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
191 m = m[index,]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
192 sink()
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
193
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
194 # Place plots in the history as a ZIP file
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
195 if (plot == 'true') {
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
196 logger("Creating archive with model graphics..")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
197 system(paste("zip -9 -r models.zip *.pdf > /dev/null", sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
198 system(paste("cp models.zip ", plot_archive, sep=""))
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
199 }
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
200
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
201 # Save dataframe as tab separated file
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
202 logger("All done, saving data..")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
203 header = c("Column1", "Column2", "Coefficient1", "Coefficient2", "Coefficient3", "Coefficient4",
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
204 "LeftLimit", "RightLimit", "Residuals", "pvalue", "Rsquared")
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
205 if (method != 'poly')
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
206 header = header[c(1:4, 7:11)]
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
207 write(progress, logfile)
9d5f4f5f764b Initial commit to toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
208 write.table(m, file=out_file, sep="\t", quote=FALSE, col.names=header, row.names=FALSE)