| 
0
 | 
     1 # Finding outliers by standard deviation
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 # Get samples whose pc1 OR pc2 values lie more than 'numsds' s.devs
 | 
| 
 | 
     4 # away from the sample median for that pc.
 | 
| 
 | 
     5 outliers_by_sd = function(pca_data, xsamples, numsds) {
 | 
| 
 | 
     6   pc1_outliers = find_outliers(pca_data$values[, 1], numsds)
 | 
| 
 | 
     7   pc2_outliers = find_outliers(pca_data$values[, 2], numsds)
 | 
| 
 | 
     8   all_outliers = union(pc1_outliers, pc2_outliers)
 | 
| 
 | 
     9   return(all_outliers)
 | 
| 
 | 
    10 }
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 # compute outliers
 | 
| 
 | 
    13 # Returns indices of all samples which lie more than
 | 
| 
 | 
    14 # 'numsds' s.devs away from the sample median
 | 
| 
 | 
    15 find_outliers = function(input_data, numsds) {
 | 
| 
 | 
    16   lower = median(input_data) - numsds*sd(input_data)
 | 
| 
 | 
    17   upper = median(input_data) + numsds*sd(input_data)
 | 
| 
 | 
    18   
 | 
| 
 | 
    19   outliers = which(input_data < lower | input_data > upper)
 | 
| 
 | 
    20   return(outliers)
 | 
| 
 | 
    21 } |