Mercurial > repos > jason-ellul > iterativepca
comparison iterative_pca_plot.R @ 0:cb54350e76ae draft default tip
Uploaded
| author | jason-ellul |
|---|---|
| date | Wed, 01 Jun 2016 03:24:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cb54350e76ae |
|---|---|
| 1 library(flashpcaR) | |
| 2 library(dbscan) | |
| 3 library(cluster) | |
| 4 | |
| 5 ## MAIN ### | |
| 6 # get command line arguments | |
| 7 CLI_FLAG = 1 | |
| 8 if (CLI_FLAG == 1) { | |
| 9 n = commandArgs(trailingOnly=TRUE)[1] | |
| 10 outdir = commandArgs(trailingOnly=TRUE)[2] | |
| 11 basename = commandArgs(trailingOnly=TRUE)[3] | |
| 12 data_source = commandArgs(trailingOnly=TRUE)[4] | |
| 13 data_type = commandArgs(trailingOnly=TRUE)[5] | |
| 14 eth_filename = commandArgs(trailingOnly=TRUE)[6] | |
| 15 control_tag = commandArgs(trailingOnly=TRUE)[7] | |
| 16 if (control_tag == "None") {control_tag = NULL} | |
| 17 cases_tag = commandArgs(trailingOnly=TRUE)[8] | |
| 18 if (cases_tag == "None") {cases_tag = NULL} | |
| 19 numsds = as.numeric(commandArgs(trailingOnly=TRUE)[9]) | |
| 20 cmethod = commandArgs(trailingOnly=TRUE)[10] | |
| 21 tmethod = commandArgs(trailingOnly=TRUE)[11] | |
| 22 path_to_r_functions = commandArgs(trailingOnly=TRUE)[12] | |
| 23 xsamples_filename = commandArgs(trailingOnly=TRUE)[13] | |
| 24 xsnps_filename = commandArgs(trailingOnly=TRUE)[14] | |
| 25 } else { | |
| 26 n = 10 | |
| 27 basename = "test_eth2" | |
| 28 data_source = "./data/halo1_numeric.ped" | |
| 29 data_type = "numeric_ped" | |
| 30 outdir = paste0(getwd(), "/full_output_", basename) | |
| 31 #data_source = "./data/HapMap3_flashPCA_data.rds" | |
| 32 #data_type = "rds" | |
| 33 #eth_filename = "./data/HapMap3_ethnicity_rf.txt" | |
| 34 eth_filename = "./data/Halo_ethnicity_rf.txt" | |
| 35 control_tag = "HAPS" | |
| 36 cases_tag = NULL | |
| 37 numsds = 1.1 | |
| 38 cmethod = "hclust" | |
| 39 tmethod = "mcd" | |
| 40 path_to_r_functions = paste0(getwd(), "/R_functions/") | |
| 41 xsamples_filename = "./xfiles/halo1_xsamples.txt" | |
| 42 xsnps_filename = "./xfiles/halo1_xsnps.txt" | |
| 43 } | |
| 44 | |
| 45 # get source code | |
| 46 source(paste0(path_to_r_functions, "/", "plotting_functions.R")) | |
| 47 source(paste0(path_to_r_functions, "/", "pca_helpers.R")) | |
| 48 source(paste0(path_to_r_functions, "/", "pipeline_code.R")) | |
| 49 source(paste0(path_to_r_functions, "/", "clustering_functions.R")) | |
| 50 source(paste0(path_to_r_functions, "/", "outlier_trimming.R")) | |
| 51 | |
| 52 if (CLI_FLAG != 1) { | |
| 53 unlink(paste0(getwd(), "/", "full_output_", basename), recursive=TRUE) | |
| 54 } | |
| 55 | |
| 56 # read in data | |
| 57 ped_data = get_source_data(data_source, data_type) | |
| 58 eth_data = parse_ethnicity_file(eth_filename) | |
| 59 xsamples = get_first_column(xsamples_filename) | |
| 60 xsnps = get_first_column(xsnps_filename) | |
| 61 | |
| 62 # do the pca and prepare plots | |
| 63 iterations = list() | |
| 64 for(i in 1:n) { | |
| 65 fpd = filter_ped_data(ped_data, xsamples, xsnps) | |
| 66 iterations[[i]] = single_iteration(outdir, basename, fpd, xsamples, numsds, | |
| 67 cmethod, tmethod, control_tag, cases_tag, ethnicity_data=eth_data) | |
| 68 iterations[[i]]$dirname = generate_directory_name(outdir, basename, i) | |
| 69 xsamples = iterations[[i]]$xsamples | |
| 70 } | |
| 71 | |
| 72 # create folders and plots | |
| 73 for (i in 1:n) { | |
| 74 titer = iterations[[i]] | |
| 75 dir.create(titer$dirname, recursive=TRUE) | |
| 76 num_plots = titer$num_plots | |
| 77 | |
| 78 for (j in 1:num_plots) { | |
| 79 plot_filename = sprintf("%s/%s_plot_number_%d.png", titer$dirname, basename, j) | |
| 80 plot_by_groups(titer$pca_data$values[, c(1, 2)], | |
| 81 titer$plots[[j]]$groups, | |
| 82 titer$plots[[j]]$tags, | |
| 83 titer$plots[[j]]$plot_colors, | |
| 84 titer$plots[[j]]$plot_symbols, | |
| 85 titer$plots[[j]]$plot_title, | |
| 86 plot_filename=plot_filename | |
| 87 ) | |
| 88 } | |
| 89 | |
| 90 # write outliers to file | |
| 91 xfilename = paste0(titer$dirname, "/", basename, "_xfile.txt") | |
| 92 outliers_filename = paste0(titer$dirname, "/", basename, "_outliers.txt") | |
| 93 xscon = add_ethnicity_data(titer$old_xsamples, eth_data) | |
| 94 olcon = add_ethnicity_data(titer$outliers, eth_data) | |
| 95 write.table(xscon, file=xfilename, row.names=FALSE, col.names=TRUE, sep=",") | |
| 96 write.table(olcon, file=outliers_filename, row.names=FALSE, col.names=TRUE, sep=",") | |
| 97 } |
