Mercurial > repos > bornea > saint_preprocessing
diff pre_process_protein_name_set.R @ 32:63008bdf576e draft
Uploaded
author | bornea |
---|---|
date | Tue, 26 Apr 2016 17:16:03 -0400 |
parents | e6e456d3ac14 |
children |
line wrap: on
line diff
--- a/pre_process_protein_name_set.R Tue Apr 26 16:21:13 2016 -0400 +++ b/pre_process_protein_name_set.R Tue Apr 26 17:16:03 2016 -0400 @@ -80,8 +80,10 @@ mapped_protein_uniprotonly = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") mapped_protein_uniprot_accession = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9](-[0-9]+)?|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}(-[0-9]+)?|[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") peptides_txt_mapped_log2$mapped_protein = mapped_protein_uniprotonly + names_db = str_extract(swissprot_fasta,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") + names_db = names_db[!is.na(names_db)] # Runs the Tukey function returning completed table. - peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% swissprot_fasta) + peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% names_db) if (nrow(peptides_txt_mapped_log2) == 0) { print("Uniprot Database does not have any of the proteins in the peptides file") quit()