0
|
1 # Finding outliers by standard deviation
|
|
2
|
|
3 # Get samples whose pc1 OR pc2 values lie more than 'numsds' s.devs
|
|
4 # away from the sample median for that pc.
|
|
5 outliers_by_sd = function(pca_data, xsamples, numsds) {
|
|
6 pc1_outliers = find_outliers(pca_data$values[, 1], numsds)
|
|
7 pc2_outliers = find_outliers(pca_data$values[, 2], numsds)
|
|
8 all_outliers = union(pc1_outliers, pc2_outliers)
|
|
9 return(all_outliers)
|
|
10 }
|
|
11
|
|
12 # compute outliers
|
|
13 # Returns indices of all samples which lie more than
|
|
14 # 'numsds' s.devs away from the sample median
|
|
15 find_outliers = function(input_data, numsds) {
|
|
16 lower = median(input_data) - numsds*sd(input_data)
|
|
17 upper = median(input_data) + numsds*sd(input_data)
|
|
18
|
|
19 outliers = which(input_data < lower | input_data > upper)
|
|
20 return(outliers)
|
|
21 } |