Mercurial > repos > greg > coral_multilocus_genotype
comparison coral_multilocus_genotype.R @ 8:33d759858625 draft
Uploaded
author | greg |
---|---|
date | Thu, 15 Jul 2021 20:06:14 +0000 |
parents | bcb28b49b0cc |
children | 3c42de11ea1d |
comparison
equal
deleted
inserted
replaced
7:bcb28b49b0cc | 8:33d759858625 |
---|---|
127 mlg.filter(genind_clone, distance=bitwise_distance) <- 0.032; | 127 mlg.filter(genind_clone, distance=bitwise_distance) <- 0.032; |
128 | 128 |
129 # Create list of MLGs. | 129 # Create list of MLGs. |
130 cat("\nCreating list of mlg_ids...\n\n"); | 130 cat("\nCreating list of mlg_ids...\n\n"); |
131 mlg_ids <- mlg.id(genind_clone); | 131 mlg_ids <- mlg.id(genind_clone); |
132 cat("\nCreated list of mlg_ids...\n\n"); | |
132 | 133 |
133 # Read user's Affymetrix 96 well plate tabular file. | 134 # Read user's Affymetrix 96 well plate tabular file. |
135 cat("\nCreating affy_metadata_data_frame...\n\n"); | |
134 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings=c("", "NA"), quote=""); | 136 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings=c("", "NA"), quote=""); |
137 cat("\nCreated affy_metadata_data_frame...\n\n"); | |
135 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef", | 138 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef", |
136 "region", "latitude", "longitude", "geographic_origin", "colony_location", | 139 "region", "latitude", "longitude", "geographic_origin", "colony_location", |
137 "depth", "disease_resist", "bleach_resist", "mortality","tle", | 140 "depth", "disease_resist", "bleach_resist", "mortality","tle", |
138 "spawning", "collector_last_name", "collector_first_name", "organization", "collection_date", | 141 "spawning", "collector_last_name", "collector_first_name", "organization", "collection_date", |
139 "email", "seq_facility", "array_version", "public", "public_after_date", | 142 "email", "seq_facility", "array_version", "public", "public_after_date", |
317 group_by(row_number()) %>% | 320 group_by(row_number()) %>% |
318 dplyr::rename(group="row_number()") %>% | 321 dplyr::rename(group="row_number()") %>% |
319 unnest (affy_id) %>% | 322 unnest (affy_id) %>% |
320 # Join with mlg table. | 323 # Join with mlg table. |
321 left_join(smlg_data_frame %>% | 324 left_join(smlg_data_frame %>% |
322 select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id"), | 325 select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id", |
326 "genetic_coral_species_call", "bcoral_genet_id"), | |
323 by="affy_id"); | 327 by="affy_id"); |
324 | 328 |
325 # If found in database, group members on previous mlg id. | 329 # If found in database, group members on previous mlg id. |
326 uniques <- unique(sample_mlg_tibble[c("group", "coral_mlg_clonal_id")]); | 330 uniques <- unique(sample_mlg_tibble[c("group", "coral_mlg_clonal_id")]); |
327 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),]; | 331 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),]; |
760 # A.palmata 1104 | 764 # A.palmata 1104 |
761 prep_genotype_tibble <- id_data_table %>% | 765 prep_genotype_tibble <- id_data_table %>% |
762 group_by(row_number()) %>% | 766 group_by(row_number()) %>% |
763 dplyr::rename(group='row_number()') %>% | 767 dplyr::rename(group='row_number()') %>% |
764 unnest(affy_id) %>% | 768 unnest(affy_id) %>% |
765 left_join(smlg_data_frame %>% | 769 left_join(sample_mlg_match_tibble %>% |
766 select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id", "user_specimen_id", | 770 select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id", |
767 "genetic_coral_species_call", "bcoral_genet_id"), | 771 "genetic_coral_species_call", "bcoral_genet_id", "db_match"), |
768 by='affy_id'); | 772 by='affy_id') %>% |
773 right_join(sample_mlg_match_tibble %>% | |
774 select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"), | |
775 by='coral_mlg_clonal_id') %>% | |
776 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>% | |
777 ungroup() %>% | |
778 dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>% | |
779 distinct(); | |
780 | |
769 # Confirm that the representative mlg is the same between runs. | 781 # Confirm that the representative mlg is the same between runs. |
770 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); | 782 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); |
771 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),]; | 783 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),]; |
772 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id)); | 784 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id)); |
773 na.group2 <- prep_genotype_tibble$group[na.mlg3]; | 785 na.group2 <- prep_genotype_tibble$group[na.mlg3]; |
780 # a100000-… 13905 HG0048 13905 | 792 # a100000-… 13905 HG0048 13905 |
781 # genetic_coral_species_call bcoral_genet_id | 793 # genetic_coral_species_call bcoral_genet_id |
782 # <chr> <chr> | 794 # <chr> <chr> |
783 # A.palmata C1651 | 795 # A.palmata C1651 |
784 representative_mlg_tibble <- prep_genotype_tibble %>% | 796 representative_mlg_tibble <- prep_genotype_tibble %>% |
785 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id), affy_id, coral_mlg_rep_sample_id)) %>% | 797 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>% |
786 ungroup() %>% | 798 ungroup() %>% |
787 select(-group); | 799 select(-group)%>% |
800 distinct(); | |
788 # prep_genotype_table_tibble looks like this: | 801 # prep_genotype_table_tibble looks like this: |
789 # affy_id coral_mlg_clonal_id user_specimen_id db_match | 802 # affy_id coral_mlg_clonal_id user_specimen_id db_match |
790 # a550962...CEL HG0120 1090 match | 803 # a550962...CEL HG0120 1090 match |
791 # genetic_coral_species_call coral_mlg_rep_sample_id | 804 # genetic_coral_species_call coral_mlg_rep_sample_id |
792 # A.palmata 1104 | 805 # A.palmata 1104 |
801 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id | 814 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id |
802 # A.palmata 1104 <NA> | 815 # A.palmata 1104 <NA> |
803 genotype_table_tibble <- prep_genotype_table_tibble %>% | 816 genotype_table_tibble <- prep_genotype_table_tibble %>% |
804 left_join(affy_metadata_data_frame %>% | 817 left_join(affy_metadata_data_frame %>% |
805 select("user_specimen_id", "bcoral_genet_id"), | 818 select("user_specimen_id", "bcoral_genet_id"), |
806 by='user_specimen_id'); | 819 by='user_specimen_id') %>% |
820 drop_na(coral_mlg_rep_sample_id); | |
807 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); | 821 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); |
808 | 822 |
809 # Output the file needed for populating the person table. | 823 # Output the file needed for populating the person table. |
810 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows)); | 824 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows)); |
811 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email"); | 825 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email"); |