changeset 1:1236ee08ccb8 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author proteore
date Fri, 16 Feb 2018 03:40:36 -0500
parents d89c09253c8d
children 58a8ddd58dde
files goprofiles.R goprofiles.xml test-data/GO_Profile_diagram_outputs__profile.BP.pdf test-data/GO_Profile_diagram_outputs__profile.CC.pdf test-data/GO_Profile_diagram_outputs__profile.MF.pdf test-data/ID_Converter_FKW_Lacombe_et_al_2017_OK.txt test-data/UnipIDs.txt test-data/profile.BP.pdf test-data/profile.CC.pdf test-data/profile.MF.pdf
diffstat 10 files changed, 295 insertions(+), 174 deletions(-) [+]
line wrap: on
line diff
--- a/goprofiles.R	Sun Nov 26 19:19:39 2017 -0500
+++ b/goprofiles.R	Fri Feb 16 03:40:36 2018 -0500
@@ -5,16 +5,12 @@
 # Read file and return file content as data.frame?
 readfile = function(filename, header) {
   if (header == "true") {
-    # Read only the first two lines of the files as data (without headers):
+    # Read only the first line of the files as data (without headers):
     headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
-    #print("header")
-    #print(headers)
-    # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
-    #headers_names <- sapply(headers, paste, collapse = "_")
-    #print(headers_names)
-    #Read the data of the files (skipping the first 2 rows):
+    #Read the data of the files (skipping the first row):
     file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
-    #print(file[1,])
+    # Remove empty rows
+    file <- file[!apply(is.na(file) | file == "", 1, all),]
     #And assign the headers of step two to the data:
     names(file) <- headers
   }
@@ -24,10 +20,6 @@
   return(file)
 }
 
-#filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
-#test = readfile(filename)
-#str(test)
-#str(test$Gene.names)
 getprofile = function(ids, id_type, level, duplicate) {
   ####################################################################
   # Arguments
@@ -64,27 +56,6 @@
     print("IDs unable to convert to ENTREZID: ")
     print(NAs)
   }
-  #print(genes_ids)
-  # Convert Protein IDs into entrez ids
-  
-  # for (i in 1:length(id$UNIPROT)) {
-  #   print(i)
-  #   if (is.na(id[[2]][i])) {
-  #     print(id[[2]][i])
-  #   }
-  # }
-  # a = id[which(id$ENTREZID == "NA"),]
-  # print(a)
-  # print(a$UNIPROT)
-  #print(id[[1]][which(is.na(id$ENTREZID))])
-  #print(genes_ids)
-  # for (gene in genes) {
-  #   #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
-  #   id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
-  #   print(id)
-  #   genes_ids = append(genes_ids, id$ENTREZID)
-  # }
-  #print(genes_ids)
   
   # Create basic profiles
   profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
@@ -172,103 +143,117 @@
 }
 
 goprofiles = function() {
-  args = commandArgs(trailingOnly = TRUE)
-  #print(args)
-  # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
-  if (length(args) != 9) {
-    stop("Not enough/Too many arguments", call. = FALSE)
+  args <- commandArgs(TRUE)
+  if(length(args)<1) {
+    args <- c("--help")
   }
-  else {
-    input_type = args[2]
-    if (input_type == "text") {
-      input = strsplit(args[1], "\\s+")[[1]]
-    }
-    else if (input_type == "file") {
-      filename = strsplit(args[1], ",")[[1]][1]
-      ncol = strsplit(args[1], ",")[[1]][2]
-      # Check ncol
-      if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
-        stop("Please enter an integer for level")
-      }
-      else {
-        ncol = as.numeric(gsub("c", "", ncol))
-      }
-      header = strsplit(args[1], ",")[[1]][3]
-      # Get file content
-      file = readfile(filename, header)
-      # Extract Protein IDs list
-      input = c()
-      for (row in as.character(file[,ncol])) {
-        input = c(input, strsplit(row, ";")[[1]][1])
-      }
-    }
-    id_type = args[3]
-    ontoopt = strsplit(args[4], ",")[[1]]
-    #print(ontoopt)
-    #plotopt = strsplit(args[3], ",")
-    plotopt = args[5]
-    level = args[6]
-    per = as.logical(args[7])
-    title = args[8]
-    duplicate = args[9]
-
-    profiles = getprofile(input, id_type, level, duplicate)
-    profile.CC = profiles[1]
-    #print(profile.CC)
-    profile.MF = profiles[2]
-    #print(profile.MF)
-    profile.BP = profiles[3]
-    #print(profile.BP)
-    profile.ALL = profiles[-3:-1]
-    #print(profile.ALL)
-    #c(profile.ALL, profile.CC, profile.MF, profile.BP)
-    if ("CC" %in% ontoopt) {
-      if (grepl("PNG", plotopt)) {
-        plotPNG(profile.CC=profile.CC, per=per, title=title)
-      }
-      if (grepl("JPEG", plotopt)) {
-        plotJPEG(profile.CC = profile.CC, per=per, title=title)
-      }
-      if (grepl("PDF", plotopt)) {
-        plotPDF(profile.CC = profile.CC, per=per, title=title)
-      }
-    }
-    if ("MF" %in% ontoopt) {
-      if (grepl("PNG", plotopt)) {
-        plotPNG(profile.MF = profile.MF, per=per, title=title)
-      }
-      if (grepl("JPEG", plotopt)) {
-        plotJPEG(profile.MF = profile.MF, per=per, title=title)
-      }
-      if (grepl("PDF", plotopt)) {
-        plotPDF(profile.MF = profile.MF, per=per, title=title)
-      }
-    }
-    if ("BP" %in% ontoopt) {
-      if (grepl("PNG", plotopt)) {
-        plotPNG(profile.BP = profile.BP, per=per, title=title)
-      }
-      if (grepl("JPEG", plotopt)) {
-        plotJPEG(profile.BP = profile.BP, per=per, title=title)
-      }
-      if (grepl("PDF", plotopt)) {
-        plotPDF(profile.BP = profile.BP, per=per, title=title)
-      }
-    }
-    
-    #if (grepl("PNG", plotopt)) {
-    # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
-    #}
-    #if (grepl("JPEG", plotopt)) {
-    # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
-    #}
-    #if (grepl("PDF", plotopt)) {
-    # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
-    #}
+  
+  # Help section
+  if("--help" %in% args) {
+    cat("Selection and Annotation HPA
+    Arguments:
+        --input_type: type of input (list of id or filename)
+        --input: input
+        --ncol: the column number which you would like to apply...
+        --header: true/false if your file contains a header
+        --id_type: the type of input IDs (UniProt/EntrezID)
+        --onto_opt: ontology options
+        --plot_opt: plot extension options (PDF/JPEG/PNG)
+        --level: 1-3
+        --per
+        --title: title of the plot
+        --duplicate: remove dupliate input IDs (true/false)
+        --text_output: text output filename \n")
+    q(save="no")
   }
   
+  # Parse arguments
+  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+  args <- as.list(as.character(argsDF$V2))
+  names(args) <- argsDF$V1
+
+  input_type = args$input_type
+  if (input_type == "text") {
+    input = strsplit(args$input, " ")[[1]]
+  }
+  else if (input_type == "file") {
+    filename = args$input
+    ncol = args$ncol
+    # Check ncol
+    if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+      stop("Please enter an integer for level")
+    }
+    else {
+      ncol = as.numeric(gsub("c", "", ncol))
+    }
+    header = args$header
+    # Get file content
+    file = readfile(filename, header)
+    # Extract Protein IDs list
+    input = c()
+    for (row in as.character(file[,ncol])) {
+      input = c(input, strsplit(row, ";")[[1]][1])
+    }
+  }
+  id_type = args$id_type
+  ontoopt = strsplit(args$onto_opt, ",")[[1]]
+  #print(ontoopt)
+  #plotopt = strsplit(args[3], ",")
+  plotopt = args$plot_opt
+  level = args$level
+  per = as.logical(args$per)
+  title = args$title
+  duplicate = args$duplicate
+  text_output = args$text_output
+
+  profiles = getprofile(input, id_type, level, duplicate)
+  profile.CC = profiles[1]
+  #print(profile.CC)
+  profile.MF = profiles[2]
+  #print(profile.MF)
+  profile.BP = profiles[3]
+  #print(profile.BP)
+  profile.ALL = profiles[-3:-1]
+  #print(profile.ALL)
+  #c(profile.ALL, profile.CC, profile.MF, profile.BP)
+    
+  if ("CC" %in% ontoopt) {
+    write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    if (grepl("PNG", plotopt)) {
+      plotPNG(profile.CC=profile.CC, per=per, title=title)
+    }
+    if (grepl("JPEG", plotopt)) {
+      plotJPEG(profile.CC = profile.CC, per=per, title=title)
+    }
+    if (grepl("PDF", plotopt)) {
+      plotPDF(profile.CC = profile.CC, per=per, title=title)
+    }
+  }
+  if ("MF" %in% ontoopt) {
+    write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    if (grepl("PNG", plotopt)) {
+      plotPNG(profile.MF = profile.MF, per=per, title=title)
+    }
+    if (grepl("JPEG", plotopt)) {
+      plotJPEG(profile.MF = profile.MF, per=per, title=title)
+    }
+    if (grepl("PDF", plotopt)) {
+      plotPDF(profile.MF = profile.MF, per=per, title=title)
+    }
+  }
+  if ("BP" %in% ontoopt) {
+    write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    if (grepl("PNG", plotopt)) {
+      plotPNG(profile.BP = profile.BP, per=per, title=title)
+    }
+    if (grepl("JPEG", plotopt)) {
+      plotJPEG(profile.BP = profile.BP, per=per, title=title)
+    }
+    if (grepl("PDF", plotopt)) {
+      plotPDF(profile.BP = profile.BP, per=per, title=title)
+    }
+  }
 }
 
 goprofiles()
-
-#Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"
--- a/goprofiles.xml	Sun Nov 26 19:19:39 2017 -0500
+++ b/goprofiles.xml	Fri Feb 16 03:40:36 2018 -0500
@@ -5,7 +5,7 @@
         <requirement type="package" version="3.4.1">R</requirement>
         <requirement type="package" version="3.3.0">bioconductor-org.hs.eg.db</requirement>
         <requirement type="package" version="1.38.0">bioconductor-annotationdbi</requirement>
-        <requirement type="package" version="2.34.0">bioconductor-biobase</requirement>
+        <requirement type="package" version="2.38.0">bioconductor-biobase</requirement>
         <requirement type="package" version="1.38.0">goprofiles</requirement>
     </requirements>
     <stdio>
@@ -14,29 +14,35 @@
     <command><![CDATA[
         Rscript $__tool_directory__/goprofiles.R
         #if $input.ids == "text"
-            "$input.text" "text"
-        #else 
-            "$input.file,$input.ncol,$input.header" "file"
+            --input_type="text"
+            --input="$input.text"
+        #else
+            --input_type="file"
+            --input="$input.file"
+            --ncol="$input.ncol"
+            --header="$input.header"
         #end if
         
-        $input.id_type
+        --id_type="$input.id_type"
+        
+        --onto_opt="$onto_opt"
         
-        $onto_opt
+        --plot_opt="$opt.plot_opt"
         
-        $opt.plot_opt
+        --level="$level"
         
-        $level
+        --per="$per"
         
-        $per
+        --title="$title"
         
-        "$title"
-        
-        $duplicate
+        --duplicate="$duplicate"
+
+        --text_output="$text_output"
 
     ]]></command>
     <inputs>
         <conditional name="input" >
-            <param name="ids" type="select" label="Provide your Entrez Gene or UniProt identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
+            <param name="ids" type="select" label="Enter your ID list (only Entrez Gene ID or UniProt accession number allowed" help="Copy/paste or ID list from a file (e.g. table)" >
                 <option value="text">Copy/paste your identifiers</option>
                 <option value="file">Input file containing your identifiers</option>
             </param>
@@ -53,7 +59,7 @@
                 </param>
                 <param name="id_type" type="select" label="Please select the type of your IDs list" >
                     <option value="Entrez">Entrez Gene ID</option>
-                    <option value="UniProt">UniProt protein ID</option>
+                    <option value="UniProt">UniProt protein acession number</option>
                 </param>
             </when>
             <when value="file" >
@@ -93,16 +99,18 @@
 	        <discover_datasets pattern="(?P&lt;designation&gt;.+\.jpeg)" ext="jpg" />
 	        <discover_datasets pattern="(?P&lt;designation&gt;.+\.pdf)" ext="pdf" />
 	    </collection>
+        <data name="text_output" format="tabular" label="GO Profile text output" />
     </outputs>
     <tests>
         <test>
             <conditional name="input">
                 <param name="ids" value="file" />
-                <param name="file" value="UnipIDs.txt" />
+                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
                 <param name="ncol" value="c1" />
-                <param name="header" value="false" /> 
+                <param name="header" value="talse" /> 
                 <param name="id_type" value="UniProt" />              
             </conditional>
+            <param name="duplicate" value="false"/>
             <param name="onto_opt" value="CC,MF,BP" />
             <param name="level" value="2" />
             <param name="per" value="true" />
@@ -111,14 +119,15 @@
                 <param name="plot_opt" value="PDF" />
             </section>
             <output_collection name="output" type="list" >
-                <element name="profile.BP.pdf" file="profile.BP.pdf" ftype="pdf" />
-                <element name="profile.MF.pdf" file="profile.MF.pdf" ftype="pdf" />
-                <element name="profile.CC.pdf" file="profile.CC.pdf" ftype="pdf" />
+                <element name="GO_Profile_diagram_outputs__profile.BP.pdf" file="GO_Profile_diagram_outputs__profile.BP.pdf" ftype="pdf" />
+                <element name="GO_Profile_diagram_outputs__profile.CC.pdf" file="GO_Profile_diagram_outputs__profile.CC.pdf" ftype="pdf" />
+                <element name="GO_Profile_diagram_outputs__profile.MF.pdf" file="GO_Profile_diagram_outputs__profile.MF.pdf" ftype="pdf" />
             </output_collection>
+            <output name="text_output" file="GO_Profile_text_output.txt"/>
         </test>
     </tests>
     <help><![CDATA[
-This tool, based on the goProfiles R package, performs statistical analysis of functional profiles. It is based on GO ontology and considers either a gene set ('Entrez’ Identifiers) or a protein set (Uniprot ID) as input. 
+This tool, based on the goProfiles R package, performs statistical analysis of functional profiles. It is based on GO ontology and considers either a gene set ('Entrez’ Identifiers) or a protein set (Uniprot accession number) as input. 
 
 You can choose one or more GO categories: 
 
@@ -126,11 +135,11 @@
 * Cellular Component (CC) 
 * Molecular Function (MF) 
 
-Functional profile at a given GO level is obtained by counting the number of identifiers having a hit in each category of this level (2 by default). Results are displayed as bar plots (with absolute or relative frequencies) and can be exported in pdf, png and jpeg formats.  
+Functional profile at a given GO level is obtained by counting the number of identifiers having a hit in each category of this level (2 by default). Results are displayed as bar plots (with absolute or relative frequencies) and can be exported in pdf, png and jpeg formats; textual output with GO terms and their computed frequencies is also provided.  
 
 For more details about GoProfiles, please read: Salicrú et al. Comparison of lists of genes based on functional profiles. BMC Bioinformatics. 2011;12:401.(https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-401)  
 
-If your type of identifiers is not supported (i.e. different form Uniprot and Entrez), please use the **ID Converter** component in the ProteoRE section to convert your list of IDs first.
+If your type of identifiers is not supported (i.e. different from Uniprot and Entrez), please use the **ID Converter** tool in the ProteoRE section to convert your list of IDs first.
 
 -----
 
Binary file test-data/GO_Profile_diagram_outputs__profile.BP.pdf has changed
Binary file test-data/GO_Profile_diagram_outputs__profile.CC.pdf has changed
Binary file test-data/GO_Profile_diagram_outputs__profile.MF.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID_Converter_FKW_Lacombe_et_al_2017_OK.txt	Fri Feb 16 03:40:36 2018 -0500
@@ -0,0 +1,152 @@
+Protein accession number (UniProt)	Protein name	Number of peptides (razor + unique)	neXtProt_ID	UniProt.ID	GeneID	MIM	Ensembl
+P15924	Desmoplakin	69	NX_P15924	DESP_HUMAN	1832	125647; 605676; 607450; 607655; 609638; 612908; 615821	ENSG00000096696
+P02538	Keratin, type II cytoskeletal 6A	53	NX_P02538	K2C6A_HUMAN	3853	148041; 615726	ENSG00000205420
+P02768	Serum albumin	44	NX_P02768	ALBU_HUMAN	213	103600; 615999; 616000	ENSG00000163631
+P08779	Keratin, type I cytoskeletal 16	29	NX_P08779	K1C16_HUMAN	3868	148067; 167200; 613000	ENSG00000186832
+Q02413	Desmoglein-1	24	NX_Q02413	DSG1_HUMAN	1828	125670; 148700; 615508	ENSG00000134760
+P07355	Annexin A2;Putative annexin A2-like protein	22	NX_P07355	ANXA2_HUMAN	302	151740	ENSG00000182718
+P14923	Junction plakoglobin	22	NX_P14923	PLAK_HUMAN	3728	173325; 601214; 611528	ENSG00000173801
+P02788	Lactotransferrin	21	NX_P02788	TRFL_HUMAN	4057	150210	ENSG00000012223
+Q9HC84	Mucin-5B	21	NX_Q9HC84	MUC5B_HUMAN	727897	178500; 600770	ENSG00000117983
+P29508	Serpin B3	20	NX_P29508	SPB3_HUMAN	6317	600517	ENSG00000057149
+P63261	Actin, cytoplasmic 2	19	NX_P63261	ACTG_HUMAN	71	102560; 604717; 614583	ENSG00000184009
+Q8N1N4	Keratin, type II cytoskeletal 78	18	NX_Q8N1N4	K2C78_HUMAN	196374	611159	ENSG00000170423
+Q04695	Keratin, type I cytoskeletal 17	18	NX_Q04695	K1C17_HUMAN	3872	148069; 167210; 184500	ENSG00000128422
+P01876	Ig alpha-1 chain C region	16	NX_P01876	IGHA1_HUMAN	NA	146900	ENSG00000211895; ENSG00000282633
+Q01469	Fatty acid-binding protein 5, epidermal	15	NX_Q01469	FABP5_HUMAN	2171	605168	ENSG00000164687
+P31944	Caspase-14	15	NX_P31944	CASPE_HUMAN	23581	605848; 617320	ENSG00000105141
+P01833	Polymeric immunoglobulin receptor	15	NX_P01833	PIGR_HUMAN	5284	173880	ENSG00000162896
+P06733	Alpha-enolase	15	NX_P06733	ENOA_HUMAN	2023	172430	ENSG00000074800
+P25311	Zinc-alpha-2-glycoprotein	15	NX_P25311	ZA2G_HUMAN	563	194460	ENSG00000160862
+Q15149	Plectin	15	NX_Q15149	PLEC_HUMAN	5339	131950; 226670; 601282; 612138; 613723; 616487	ENSG00000178209
+P19013	Keratin, type II cytoskeletal 4	13	NX_P19013	K2C4_HUMAN	NA	123940; 193900	ENSG00000170477
+Q6KB66	Keratin, type II cytoskeletal 80	13	NX_Q6KB66	K2C80_HUMAN	144501	611161	ENSG00000167767
+Q08188	Protein-glutamine gamma-glutamyltransferase E	12	NX_Q08188	TGM3_HUMAN	7053	600238; 617251	ENSG00000125780
+P13646	Keratin, type I cytoskeletal 13	11	NX_P13646	K1C13_HUMAN	3860	148065; 615785	ENSG00000171401
+Q86YZ3	Hornerin	11	NX_Q86YZ3	HORN_HUMAN	388697	616293	ENSG00000197915
+P04259	Keratin, type II cytoskeletal 6B	10	NX_P04259	K2C6B_HUMAN	3854	148042; 615728	ENSG00000185479
+P02545	Prelamin-A/C;Lamin-A/C	10	NX_P02545	LMNA_HUMAN	4000	115200; 150330; 151660; 159001; 176670; 181350; 212112; 248370; 275210; 605588; 610140; 613205; 616516	ENSG00000160789
+P04083	Annexin A1	10	NX_P04083	ANXA1_HUMAN	301	151690	ENSG00000135046
+P11021	78 kDa glucose-regulated protein	10	NX_P11021	GRP78_HUMAN	3309	138120	ENSG00000044574
+P02787	Serotransferrin	9	NX_P02787	TRFE_HUMAN	7018	190000; 209300	ENSG00000091513
+P04040	Catalase	9	NX_P04040	CATA_HUMAN	847	115500; 614097	ENSG00000121691
+P31151	Protein S100-A7	9	NX_P31151	S10A7_HUMAN	6278	600353	ENSG00000143556
+P31947	14-3-3 protein sigma	9	NX_P31947	1433S_HUMAN	2810	601290	ENSG00000175793
+Q96P63	Serpin B12	9	NX_Q96P63	SPB12_HUMAN	89777	615662	ENSG00000166634
+P14618	Pyruvate kinase PKM	9	NX_P14618	KPYM_HUMAN	5315	179050	ENSG00000067225
+P60174	Triosephosphate isomerase	9	NX_P60174	TPIS_HUMAN	7167	190450; 615512	ENSG00000111669
+Q06830	Peroxiredoxin-1	9	NX_Q06830	PRDX1_HUMAN	5052	176763	ENSG00000117450
+P01040	Cystatin-A	8	NX_P01040	CYTA_HUMAN	1475	184600; 607936	ENSG00000121552
+P05089	Arginase-1	8	NX_P05089	ARGI1_HUMAN	383	207800; 608313	ENSG00000118520
+P01834	Ig kappa chain C region	8	NX_P01834	IGKC_HUMAN	NA	147200; 614102	NA
+P04406	Glyceraldehyde-3-phosphate dehydrogenase	8	NX_P04406	G3P_HUMAN	2597	138400	ENSG00000111640
+P0DMV9	Heat shock 70 kDa protein 1B	8	NX_P0DMV9	HS71B_HUMAN	3303; 3304	140550; 603012	ENSG00000204388; ENSG00000224501; ENSG00000212866; ENSG00000231555; ENSG00000232804
+P13639	Elongation factor 2	8	NX_P13639	EF2_HUMAN	1938	130610; 609306	ENSG00000167658
+P35579	Myosin-9	8	NX_P35579	MYH9_HUMAN	4627	153640; 153650; 155100; 160775; 600208; 603622; 605249	ENSG00000100345
+P68371	Tubulin beta-4B chain	8	NX_P68371	TBB4B_HUMAN	10383	602660	ENSG00000188229
+Q8WVV4	Protein POF1B	8	NX_Q8WVV4	POF1B_HUMAN	79983	300603; 300604	ENSG00000124429
+O75635	Serpin B7	7	NX_O75635	SPB7_HUMAN	8710	603357; 615598	ENSG00000166396
+P01857	Ig gamma-1 chain C region	7	NX_P01857	IGHG1_HUMAN	NA	147100; 254500	ENSG00000211896; ENSG00000277633
+P61626	Lysozyme C	7	NX_P61626	LYSC_HUMAN	4069	105200; 153450	ENSG00000090382
+P68363	Tubulin alpha-1B chain	7	NX_P68363	TBA1B_HUMAN	10376	602530	ENSG00000123416
+P01009	Alpha-1-antitrypsin;Short peptide from AAT	6	NX_P01009	A1AT_HUMAN	5265	107400; 613490	ENSG00000197249
+P07900	Heat shock protein HSP 90-alpha	6	NX_P07900	HS90A_HUMAN	3320	140571	ENSG00000080824
+Q9NZH8	Interleukin-36 gamma	6	NX_Q9NZH8	IL36G_HUMAN	56300	605542	ENSG00000136688
+O43707	Alpha-actinin-4;Alpha-actinin-1	6	NX_O43707	ACTN4_HUMAN	81	603278; 604638	ENSG00000130402; ENSG00000282844
+O75223	Gamma-glutamylcyclotransferase	6	NX_O75223	GGCT_HUMAN	79017	137170	ENSG00000006625
+P00338	L-lactate dehydrogenase A chain	6	NX_P00338	LDHA_HUMAN	3939	150000; 612933	ENSG00000134333
+P07339	Cathepsin D	6	NX_P07339	CATD_HUMAN	1509	116840; 610127	ENSG00000117984
+P62987	Ubiquitin-60S ribosomal protein L40	6	NX_P62987	RL40_HUMAN	7311	191321	ENSG00000221983
+P10599	Thioredoxin	6	NX_P10599	THIO_HUMAN	7295	187700	ENSG00000136810
+Q9UGM3	Deleted in malignant brain tumors 1 protein	6	NX_Q9UGM3	DMBT1_HUMAN	1755	137800; 601969	ENSG00000187908
+Q9UI42	Carboxypeptidase A4	6	NX_Q9UI42	CBPA4_HUMAN	51200	607635	ENSG00000128510
+P47929	Galectin-7	5	NX_P47929	LEG7_HUMAN	3963; 653499	600615; 617139	ENSG00000178934; ENSG00000205076; ENSG00000282902; ENSG00000283082
+Q13867	Bleomycin hydrolase	5	NX_Q13867	BLMH_HUMAN	642	602403	ENSG00000108578
+Q6P4A8	Phospholipase B-like 1	5	NX_Q6P4A8	PLBL1_HUMAN	79887	NA	ENSG00000121316
+O75369	Filamin-B	5	NX_O75369	FLNB_HUMAN	2317	108720; 108721; 112310; 150250; 272460; 603381	ENSG00000136068
+P00441	Superoxide dismutase [Cu-Zn]	5	NX_P00441	SODC_HUMAN	6647	105400; 147450	ENSG00000142168
+P04792	Heat shock protein beta-1	5	NX_P04792	HSPB1_HUMAN	3315	602195; 606595; 608634	ENSG00000106211
+P11142	Heat shock cognate 71 kDa protein	5	NX_P11142	HSP7C_HUMAN	3312	600816	ENSG00000109971
+P58107	Epiplakin	5	NX_P58107	EPIPL_HUMAN	83481	607553	NA
+P60842	Eukaryotic initiation factor 4A-I	5	NX_P60842	IF4A1_HUMAN	1973	602641	ENSG00000161960
+P62937	Peptidyl-prolyl cis-trans isomerase A	5	NX_P62937	PPIA_HUMAN	5478	123840	ENSG00000196262
+P63104	14-3-3 protein zeta/delta	5	NX_P63104	1433Z_HUMAN	7534	601288	ENSG00000164924
+Q92820	Gamma-glutamyl hydrolase	5	NX_Q92820	GGH_HUMAN	8836	601509	ENSG00000137563
+O75342	Arachidonate 12-lipoxygenase, 12R-type	4	NX_O75342	LX12B_HUMAN	242	242100; 603741	ENSG00000179477
+P09211	Glutathione S-transferase P	4	NX_P09211	GSTP1_HUMAN	2950	134660	ENSG00000084207
+P31025	Lipocalin-1	4	NX_P31025	LCN1_HUMAN	3933	151675	ENSG00000160349
+P48594	Serpin B4	4	NX_P48594	SPB4_HUMAN	6318	600518	ENSG00000206073
+Q14574	Desmocollin-3	4	NX_Q14574	DSC3_HUMAN	1825	600271; 613102	ENSG00000134762
+Q5T750	Skin-specific protein 32	4	NX_Q5T750	XP32_HUMAN	100129271	NA	ENSG00000198854
+Q6UWP8	Suprabasin	4	NX_Q6UWP8	SBSN_HUMAN	374897	609969	ENSG00000189001
+O60911	Cathepsin L2	4	NX_O60911	CATL2_HUMAN	1515	603308	ENSG00000136943
+P00558	Phosphoglycerate kinase 1	4	NX_P00558	PGK1_HUMAN	5230	300653; 311800	ENSG00000102144
+P04075	Fructose-bisphosphate aldolase A	4	NX_P04075	ALDOA_HUMAN	226	103850; 611881	ENSG00000149925
+P07384	Calpain-1 catalytic subunit	4	NX_P07384	CAN1_HUMAN	823	114220; 616907	ENSG00000014216
+P0CG05	Ig lambda-2 chain C regions	4	NA	NA	NA	NA	NA
+P18206	Vinculin	4	NX_P18206	VINC_HUMAN	7414	193065; 611407; 613255	ENSG00000035403
+P62258	14-3-3 protein epsilon	4	NX_P62258	1433E_HUMAN	7531	605066	ENSG00000108953; ENSG00000274474
+P68871	Hemoglobin subunit beta	4	NX_P68871	HBB_HUMAN	3043	140700; 141900; 603902; 603903; 611162; 613985	ENSG00000244734
+Q9C075	Keratin, type I cytoskeletal 23	4	NX_Q9C075	K1C23_HUMAN	25984	606194	ENSG00000108244; ENSG00000263309
+A8K2U0	Alpha-2-macroglobulin-like protein 1	3	NX_A8K2U0	A2ML1_HUMAN	144568	610627	ENSG00000166535
+P00738	Haptoglobin	3	NX_P00738	HPT_HUMAN	3240	140100; 614081	ENSG00000257017
+P01011	Alpha-1-antichymotrypsin	3	NX_P01011	AACT_HUMAN	12	107280	ENSG00000196136
+P02763	Alpha-1-acid glycoprotein 1	3	NX_P02763	A1AG1_HUMAN	5004	138600	ENSG00000229314
+P18510	Interleukin-1 receptor antagonist protein	3	NX_P18510	IL1RA_HUMAN	3557	147679; 612628; 612852	ENSG00000136689
+P22528	Cornifin-B	3	NX_P22528	SPR1B_HUMAN	6699	182266	ENSG00000169469
+P30740	Leukocyte elastase inhibitor	3	NX_P30740	ILEU_HUMAN	1992	130135	ENSG00000021355
+P80188	Neutrophil gelatinase-associated lipocalin	3	NX_P80188	NGAL_HUMAN	3934	600181	ENSG00000148346
+Q15828	Cystatin-M	3	NX_Q15828	CYTM_HUMAN	1474	601891	ENSG00000175315
+Q9HCY8	Protein S100-A14	3	NX_Q9HCY8	S10AE_HUMAN	57402	607986	ENSG00000189334
+P01623	Ig kappa chain V-III region	3	NA	NA	NA	NA	NA
+P01877	Ig alpha-2 chain C region	3	NX_P01877	IGHA2_HUMAN	NA	147000	ENSG00000211890
+P06396	Gelsolin	3	NX_P06396	GELS_HUMAN	2934	105120; 137350	ENSG00000148180
+P14735	Insulin-degrading enzyme	3	NX_P14735	IDE_HUMAN	3416	146680	ENSG00000119912
+P20933	N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase	3	NX_P20933	ASPG_HUMAN	175	208400; 613228	ENSG00000038002
+P25788	Proteasome subunit alpha type-3	3	NX_P25788	PSA3_HUMAN	5684	176843; 176845	ENSG00000100567
+P26641	Elongation factor 1-gamma	3	NX_P26641	EF1G_HUMAN	1937	130593	ENSG00000254772
+P36952	Serpin B5	3	NX_P36952	SPB5_HUMAN	5268	154790	ENSG00000206075
+P40926	Malate dehydrogenase, mitochondrial	3	NX_P40926	MDHM_HUMAN	4191	154100; 617339	ENSG00000146701
+Q9Y6R7	IgGFc-binding protein	3	NX_Q9Y6R7	FCGBP_HUMAN	8857	617553	ENSG00000281123
+O95274	Ly6/PLAUR domain-containing protein 3	2	NX_O95274	LYPD3_HUMAN	27076	609484	ENSG00000124466
+P00491	Purine nucleoside phosphorylase	2	NX_P00491	PNPH_HUMAN	4860	164050; 613179	ENSG00000198805
+P04080	Cystatin-B	2	NX_P04080	CYTB_HUMAN	1476	254800; 601145	ENSG00000160213
+P09972	Fructose-bisphosphate aldolase C	2	NX_P09972	ALDOC_HUMAN	230	103870	ENSG00000109107
+P19012	Keratin, type I cytoskeletal 15	2	NX_P19012	K1C15_HUMAN	3866	148030	ENSG00000171346
+P20930	Filaggrin	2	NX_P20930	FILA_HUMAN	2312	135940; 146700; 605803	ENSG00000143631
+Q96FX8	p53 apoptosis effector related to PMP-22	2	NX_Q96FX8	PERP_HUMAN	64065	609301	ENSG00000112378
+Q9UIV8	Serpin B13	2	NX_Q9UIV8	SPB13_HUMAN	5275	604445	ENSG00000197641
+P01625	Ig kappa chain V-IV region Len	2	NA	NA	NA	NA	NA
+P01765	Ig heavy chain V-III region TIL	2	NA	NA	NA	NA	NA
+P01766	Ig heavy chain V-III region BRO	2	NX_P01766	HV313_HUMAN	NA	NA	ENSG00000211942; ENSG00000282286
+P01860	Ig gamma-3 chain C region	2	NX_P01860	IGHG3_HUMAN	NA	147120	NA
+P01871	Ig mu chain C region	2	NX_P01871	IGHM_HUMAN	NA	147020; 601495	ENSG00000211899; ENSG00000282657
+P05090	Apolipoprotein D	2	NX_P05090	APOD_HUMAN	347	107740	ENSG00000189058
+P06870	Kallikrein-1	2	NX_P06870	KLK1_HUMAN	3816	147910; 615953	ENSG00000167748
+P07858	Cathepsin B	2	NX_P07858	CATB_HUMAN	1508	116810	ENSG00000164733
+P08865	40S ribosomal protein SA	2	NX_P08865	RSSA_HUMAN	3921	150370; 271400	ENSG00000168028
+P11279	Lysosome-associated membrane glycoprotein 1	2	NX_P11279	LAMP1_HUMAN	3916	153330	ENSG00000185896
+P13473	Lysosome-associated membrane glycoprotein 2	2	NX_P13473	LAMP2_HUMAN	3920	300257; 309060	ENSG00000005893
+P19971	Thymidine phosphorylase	2	NX_P19971	TYPH_HUMAN	1890	131222; 603041	ENSG00000025708
+P23284	Peptidyl-prolyl cis-trans isomerase B	2	NX_P23284	PPIB_HUMAN	5479	123841; 259440	ENSG00000166794
+P23396	40S ribosomal protein S3	2	NX_P23396	RS3_HUMAN	6188	600454	ENSG00000149273
+P25705	ATP synthase subunit alpha, mitochondrial	2	NX_P25705	ATPA_HUMAN	498	164360; 615228; 616045	ENSG00000152234
+P27482	Calmodulin-like protein 3	2	NX_P27482	CALL3_HUMAN	810	114184	ENSG00000178363
+P31949	Protein S100-A11	2	NX_P31949	S10AB_HUMAN	6282	603114	ENSG00000163191
+P40121	Macrophage-capping protein	2	NX_P40121	CAPG_HUMAN	822	153615	ENSG00000042493
+P42357	Histidine ammonia-lyase	2	NX_P42357	HUTH_HUMAN	3034	235800; 609457	ENSG00000084110
+P47756	F-actin-capping protein subunit beta	2	NX_P47756	CAPZB_HUMAN	832	601572	ENSG00000077549
+P48637	Glutathione synthetase	2	NX_P48637	GSHB_HUMAN	2937	231900; 266130; 601002	ENSG00000100983
+P49720	Proteasome subunit beta type-3	2	NX_P49720	PSB3_HUMAN	5691	602176	ENSG00000277791; ENSG00000275903
+P50395	Rab GDP dissociation inhibitor beta	2	NX_P50395	GDIB_HUMAN	2665	600767	ENSG00000057608
+P59998	Actin-related protein 2/3 complex subunit 4	2	NX_P59998	ARPC4_HUMAN	10093	604226	ENSG00000241553
+P61160	Actin-related protein 2	2	NX_P61160	ARP2_HUMAN	10097	604221	ENSG00000138071
+P61916	Epididymal secretory protein E1	2	NX_P61916	NPC2_HUMAN	10577	601015; 607625	ENSG00000119655
+P04745	Alpha-amylase 1	23	NX_P04745	AMY1_HUMAN	276; 277; 278	104700; 104701; 104702	ENSG00000174876; ENSG00000187733; ENSG00000237763
+Q9NZT1	Calmodulin-like protein 5	8	NX_Q9NZT1	CALL5_HUMAN	51806	605183	ENSG00000178372
+P12273	Prolactin-inducible protein	6	NX_P12273	PIP_HUMAN	5304	176720	ENSG00000159763
+Q96DA0	Zymogen granule protein 16 homolog B	5	NX_Q96DA0	ZG16B_HUMAN	124220	NA	ENSG00000162078; ENSG00000283056
+P01036	Cystatin-S	5	NX_P01036	CYTS_HUMAN	1472	123857	ENSG00000101441
+Q8TAX7	Mucin-7	2	NX_Q8TAX7	MUC7_HUMAN	4589	158375; 600807	ENSG00000171195
+P01037	Cystatin-SN	2	NX_P01037	CYTN_HUMAN	1469	123855	ENSG00000170373
+P09228	Cystatin-SA	2	NX_P09228	CYTT_HUMAN	1470	123856	ENSG00000170369
--- a/test-data/UnipIDs.txt	Sun Nov 26 19:19:39 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-P04637
-P08246
-P63244
-P10275
-P00533
-Q14524
-P05067
-P35555
-P35222
-O95273
-P00451
-P38398
-Q05086
-Q12802
-P68871
-P04585
-Q96EB6
-Q9NYL2
-P31749
-P01137
-Q5S007
-Q08379
-P02649
-P35498
-P12931
Binary file test-data/profile.BP.pdf has changed
Binary file test-data/profile.CC.pdf has changed
Binary file test-data/profile.MF.pdf has changed