Mercurial > repos > jason-ellul > iterativepca
comparison R_functions/outlier_trimming.R @ 0:cb54350e76ae draft default tip
Uploaded
| author | jason-ellul |
|---|---|
| date | Wed, 01 Jun 2016 03:24:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cb54350e76ae |
|---|---|
| 1 # Finding outliers by standard deviation | |
| 2 | |
| 3 # Get samples whose pc1 OR pc2 values lie more than 'numsds' s.devs | |
| 4 # away from the sample median for that pc. | |
| 5 outliers_by_sd = function(pca_data, xsamples, numsds) { | |
| 6 pc1_outliers = find_outliers(pca_data$values[, 1], numsds) | |
| 7 pc2_outliers = find_outliers(pca_data$values[, 2], numsds) | |
| 8 all_outliers = union(pc1_outliers, pc2_outliers) | |
| 9 return(all_outliers) | |
| 10 } | |
| 11 | |
| 12 # compute outliers | |
| 13 # Returns indices of all samples which lie more than | |
| 14 # 'numsds' s.devs away from the sample median | |
| 15 find_outliers = function(input_data, numsds) { | |
| 16 lower = median(input_data) - numsds*sd(input_data) | |
| 17 upper = median(input_data) + numsds*sd(input_data) | |
| 18 | |
| 19 outliers = which(input_data < lower | input_data > upper) | |
| 20 return(outliers) | |
| 21 } |
