view iterative_pca_plot.R @ 0:64e75e21466e draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:38:39 -0400
parents
children
line wrap: on
line source

library(flashpcaR)
library(dbscan)
library(cluster)

## MAIN ###
# get command line arguments
CLI_FLAG = 1
if (CLI_FLAG == 1) {
  n = commandArgs(trailingOnly=TRUE)[1]
  outdir = commandArgs(trailingOnly=TRUE)[2]
  basename = commandArgs(trailingOnly=TRUE)[3]
  data_source = commandArgs(trailingOnly=TRUE)[4]
  data_type = commandArgs(trailingOnly=TRUE)[5]
  eth_filename = commandArgs(trailingOnly=TRUE)[6]
  control_tag = commandArgs(trailingOnly=TRUE)[7]
  if (control_tag == "None") {control_tag = NULL}
  cases_tag = commandArgs(trailingOnly=TRUE)[8]
  if (cases_tag == "None") {cases_tag = NULL}
  numsds = as.numeric(commandArgs(trailingOnly=TRUE)[9])
  cmethod = commandArgs(trailingOnly=TRUE)[10]
  tmethod = commandArgs(trailingOnly=TRUE)[11]
  path_to_r_functions = commandArgs(trailingOnly=TRUE)[12]
  xsamples_filename = commandArgs(trailingOnly=TRUE)[13]
  xsnps_filename = commandArgs(trailingOnly=TRUE)[14]
} else {
  n = 10
  basename = "test_eth2"
  data_source = "./data/halo1_numeric.ped"
  data_type = "numeric_ped"
  outdir = paste0(getwd(), "/full_output_", basename)
  #data_source = "./data/HapMap3_flashPCA_data.rds"
  #data_type = "rds"
  #eth_filename = "./data/HapMap3_ethnicity_rf.txt"
  eth_filename = "./data/Halo_ethnicity_rf.txt"
  control_tag = "HAPS"
  cases_tag = NULL
  numsds = 1.1
  cmethod = "hclust"
  tmethod = "mcd"
  path_to_r_functions = paste0(getwd(), "/R_functions/")
  xsamples_filename = "./xfiles/halo1_xsamples.txt"
  xsnps_filename = "./xfiles/halo1_xsnps.txt"
}

# get source code
source(paste0(path_to_r_functions, "/", "plotting_functions.R"))
source(paste0(path_to_r_functions, "/", "pca_helpers.R"))
source(paste0(path_to_r_functions, "/", "pipeline_code.R"))
source(paste0(path_to_r_functions, "/", "clustering_functions.R"))
source(paste0(path_to_r_functions, "/", "outlier_trimming.R"))

if (CLI_FLAG != 1) {
  unlink(paste0(getwd(), "/", "full_output_", basename), recursive=TRUE)
}

# read in data
ped_data = get_source_data(data_source, data_type)
eth_data = parse_ethnicity_file(eth_filename)
xsamples = get_first_column(xsamples_filename)
xsnps = get_first_column(xsnps_filename)

# do the pca and prepare plots
iterations = list()
for(i in 1:n) {
  fpd = filter_ped_data(ped_data, xsamples, xsnps)
  iterations[[i]] = single_iteration(outdir, basename, fpd, xsamples, numsds, 
                                     cmethod, tmethod, control_tag, cases_tag, ethnicity_data=eth_data)
  iterations[[i]]$dirname = generate_directory_name(outdir, basename, i)
  xsamples = iterations[[i]]$xsamples
}

# create folders and plots
for (i in 1:n) {
  titer = iterations[[i]]
  dir.create(titer$dirname, recursive=TRUE)
  num_plots = titer$num_plots
  
  for (j in 1:num_plots) {
    plot_filename = sprintf("%s/%s_plot_number_%d.png", titer$dirname, basename, j)
    plot_by_groups(titer$pca_data$values[, c(1, 2)], 
                  titer$plots[[j]]$groups, 
                  titer$plots[[j]]$tags, 
                  titer$plots[[j]]$plot_colors, 
                  titer$plots[[j]]$plot_symbols, 
                  titer$plots[[j]]$plot_title,
                  plot_filename=plot_filename
    )
  }
  
  # write outliers to file  
  xfilename = paste0(titer$dirname, "/", basename, "_xfile.txt")
  outliers_filename = paste0(titer$dirname, "/", basename, "_outliers.txt")
  xscon = add_ethnicity_data(titer$old_xsamples, eth_data)
  olcon = add_ethnicity_data(titer$outliers, eth_data)
  write.table(xscon, file=xfilename, row.names=FALSE, col.names=TRUE, sep=",")
  write.table(olcon, file=outliers_filename, row.names=FALSE, col.names=TRUE, sep=",")
}