comparison coral_multilocus_genotype.R @ 7:bcb28b49b0cc draft

Uploaded
author greg
date Tue, 12 Jan 2021 15:19:58 +0000
parents 0488ca4a8c4e
children 33d759858625
comparison
equal deleted inserted replaced
6:0488ca4a8c4e 7:bcb28b49b0cc
112 genind_clone <- as.genclone(genind_obj); 112 genind_clone <- as.genclone(genind_obj);
113 cat("\ngenind_clone:\n"); 113 cat("\ngenind_clone:\n");
114 genind_clone 114 genind_clone
115 cat("\n\n"); 115 cat("\n\n");
116 time_elapsed(start_time); 116 time_elapsed(start_time);
117 # Remove genind object from memory.
118 rm(genind_obj);
117 119
118 # Calculate the bitwise distance between individuals. 120 # Calculate the bitwise distance between individuals.
119 start_time <- time_start("Calculating the bitwise distance between individuals"); 121 start_time <- time_start("Calculating the bitwise distance between individuals");
120 bitwise_distance <- bitwise.dist(genind_clone); 122 bitwise_distance <- bitwise.dist(genind_clone);
121 time_elapsed(start_time); 123 time_elapsed(start_time);
407 cat("\nCreating id_data_table...\n"); 409 cat("\nCreating id_data_table...\n");
408 id_data_table <- data.table(id_rep, keep.rownames=TRUE); 410 id_data_table <- data.table(id_rep, keep.rownames=TRUE);
409 # Rename the id_rep column. 411 # Rename the id_rep column.
410 setnames(id_data_table, c("id_rep"), c("affy_id")); 412 setnames(id_data_table, c("id_rep"), c("affy_id"));
411 time_elapsed(start_time); 413 time_elapsed(start_time);
414 # Remove clonecorrect genind from memory.
415 rm(no_dup_genotypes_genind);
412 416
413 # Table of alleles for the new samples subset to new plate data. 417 # Table of alleles for the new samples subset to new plate data.
414 # Create vector indicating number of individuals desired from 418 # Create vector indicating number of individuals desired from
415 # affy_id column of stag_db_report data table. 419 # affy_id column of stag_db_report data table.
416 i <- ifelse(is.na(stag_db_report[3]), "", stag_db_report[[3]]); 420 i <- ifelse(is.na(stag_db_report[3]), "", stag_db_report[[3]]);
419 # Subset VCF to the user samples. 423 # Subset VCF to the user samples.
420 start_time <- time_start("Subsetting vcf to the user samples"); 424 start_time <- time_start("Subsetting vcf to the user samples");
421 affy_list <- append(stag_db_report$affy_id,"FORMAT"); 425 affy_list <- append(stag_db_report$affy_id,"FORMAT");
422 svcf <- vcf[,colnames(vcf@gt) %in% affy_list]; 426 svcf <- vcf[,colnames(vcf@gt) %in% affy_list];
423 write.vcf(svcf, "subset.vcf.gz"); 427 write.vcf(svcf, "subset.vcf.gz");
428
429 # Remove original and subset VCFs written to file from R memory.
430 rm(svcf);
431 rm(vcf);
432
433 # Load in subset VCF.
424 vcf.fn <- "subset.vcf.gz"; 434 vcf.fn <- "subset.vcf.gz";
425 snpgdsVCF2GDS(vcf.fn, "test3.gds", method="biallelic.only"); 435 snpgdsVCF2GDS(vcf.fn, "test3.gds", method="biallelic.only");
426 genofile <- snpgdsOpen(filename="test3.gds", readonly=FALSE); 436 genofile <- snpgdsOpen(filename="test3.gds", readonly=FALSE);
427 gds_array <- read.gdsn(index.gdsn(genofile, "sample.id")); 437 gds_array <- read.gdsn(index.gdsn(genofile, "sample.id"));
428 # gds_array looks like this: 438 # gds_array looks like this:
893 mutate(species_name = ifelse(genetic_coral_species_call == "A.cervicornis", "cervicornis", species_name)) %>% 903 mutate(species_name = ifelse(genetic_coral_species_call == "A.cervicornis", "cervicornis", species_name)) %>%
894 mutate(species_name = ifelse(genetic_coral_species_call == "A.prolifera", "prolifera", species_name)); 904 mutate(species_name = ifelse(genetic_coral_species_call == "A.prolifera", "prolifera", species_name));
895 colnames(taxonomy_table_data_frame) <- c("genetic_coral_species_call", "affy_id", "genus_name", "species_name"); 905 colnames(taxonomy_table_data_frame) <- c("genetic_coral_species_call", "affy_id", "genus_name", "species_name");
896 write_data_frame(output_data_dir, "taxonomy.tabular", taxonomy_table_data_frame); 906 write_data_frame(output_data_dir, "taxonomy.tabular", taxonomy_table_data_frame);
897 time_elapsed(start_time); 907 time_elapsed(start_time);
898