diff pre_process_protein_name_set.R @ 32:63008bdf576e draft

Uploaded
author bornea
date Tue, 26 Apr 2016 17:16:03 -0400
parents e6e456d3ac14
children
line wrap: on
line diff
--- a/pre_process_protein_name_set.R	Tue Apr 26 16:21:13 2016 -0400
+++ b/pre_process_protein_name_set.R	Tue Apr 26 17:16:03 2016 -0400
@@ -80,8 +80,10 @@
 	mapped_protein_uniprotonly = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") 
 	mapped_protein_uniprot_accession = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9](-[0-9]+)?|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}(-[0-9]+)?|[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
 	peptides_txt_mapped_log2$mapped_protein = mapped_protein_uniprotonly
+	names_db = str_extract(swissprot_fasta,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
+	names_db = names_db[!is.na(names_db)]
   # Runs the Tukey function returning completed table.
-  peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% swissprot_fasta)
+  peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% names_db)
   if (nrow(peptides_txt_mapped_log2) == 0) {
     print("Uniprot Database does not have any of the proteins in the peptides file")
     quit()