Mercurial > repos > proteore > proteore_kegg_pathways_coverage

--- a/compute_kegg_pathways.R	Wed Sep 19 05:38:52 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-library(KEGGREST)
-
-get_args <- function(){
-
-  ## Collect arguments
-  args <- commandArgs(TRUE)
-
-  ## Default setting when no arguments passed
-  if(length(args) < 1) {
-    args <- c("--help")
-  }
-
-  ## Help section
-  if("--help" %in% args) {
-    cat("Pathview R script
-    Arguments:
-      --help                  Print this test
-      --input                 tab file
-      --id_list
-id list ',' separated
-      --id_type               type of input ids (uniprot_AC or geneID)
-      --id_column             number og column containg ids of interest
-      --nb_pathways           number of pathways to return
-      --header                boolean
-      --output                output path
-      --ref                  ref file (l.hsa.gene.RData, l.hsa.up.RData, l.mmu.up.Rdata)
-
-      Example:
-      Rscript keggrest.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE \n\n")
-
-    q(save="no")
-  }
-
-  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
-  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
-  args <- as.list(as.character(argsDF$V2))
-  names(args) <- argsDF$V1
-
-  return(args)
-}
-
-args <- get_args()
-
-#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/compute_KEGG_pathways/args.Rda")
-#load("/home/dchristiany/proteore_project/ProteoRE/tools/compute_KEGG_pathways/args.Rda")
-
-##function arguments :
-## id.ToMap = input from the user to map on the pathways = list of IDs
-## idType : must be "UNIPROT" or "ENTREZ"
-## org : for the moment can be "Hs" only. Has to evoluate to "Mm"
-
-str2bool <- function(x){
-  if (any(is.element(c("t","true"),tolower(x)))){
-    return (TRUE)
-  }else if (any(is.element(c("f","false"),tolower(x)))){
-    return (FALSE)
-  }else{
-    return(NULL)
-  }
-}
-
-
-read_file <- function(path,header){
-  file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE)
-  if (inherits(file,"try-error")){
-    stop("File not found !")
-  }else{
-    return(file)
-  }
-}
-
-ID2KEGG.Mapping<- function(id.ToMap,ref) {
-
-    ref_ids = get(load(ref))
-    map<-lapply(ref_ids, is.element, unique(id.ToMap))
-    names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE)    #remove the prefix "path:"
-
-    in.path<-sapply(map, function(x) length(which(x==TRUE)))
-    tot.path<-sapply(map, length)
-
-    ratio<-(as.numeric(in.path[which(in.path!=0)])) / (as.numeric(tot.path[which(in.path!=0)]))
-    ratio <- as.numeric(format(round(ratio*100, 2), nsmall = 2))
-
-    ##useful but LONG
-    ## to do before : in step 1
-    path.names<-names(in.path[which(in.path!=0)])
-    name <- sapply(path.names, function(x) keggGet(x)[[1]]$NAME,USE.NAMES = FALSE)
-
-    res<-data.frame(I(names(in.path[which(in.path!=0)])), I(name), ratio, as.numeric(in.path[which(in.path!=0)]), as.numeric(tot.path[which(in.path!=0)]))
-    res <- res[order(as.numeric(res[,3]),decreasing = TRUE),]
-    colnames(res)<-c("pathway_ID", "Description" , "Ratio IDs mapped/total IDs (%)" ,"nb genes mapped in the pathway", "nb total genes present in the pathway")
-
-    return(res)
-
-}
-
-###setting variables
-header = str2bool(args$header)
-if (!is.null(args$id_list)) {id_list <- strsplit(args$id_list,",")[[1]]}
-if (!is.null(args$input)) {
-  csv <- read_file(args$input,header)
-  ncol <- as.numeric(gsub("c", "" ,args$id_column))
-  id_list <- as.vector(csv[,ncol])
-}
-id_type <- toupper(args$id_type)
-
-#mapping on pathways
-res <- ID2KEGG.Mapping(id_list,args$ref)
-if (nrow(res) > as.numeric(args$nb_pathways)) { res <- res[1:args$nb_pathways,] }
-
-write.table(res, file=args$output, quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE)
-
--- a/compute_kegg_pathways.xml	Wed Sep 19 05:38:52 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,153 +0,0 @@
-<tool id="compute_kegg_pathways" name="KEGG pathways coverage" version="2018.09.18">
-    <requirements>
-        <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-        Rscript $__tool_directory__/compute_kegg_pathways.R
-
-        #if $input.ids == "text"
-            --id_list="$input.txt"
-        #else
-            --input="$input.file"
-            --id_column="$input.ncol"
-            --header="$input.header"
-        #end if
-
-        --id_type="$ref_ids.id_type"
-        --output="$output1"
-        --nb_pathways="$nb_pathways"
-        --ref="$__tool_directory__/$ref_ids.ref_file"
-
-    ]]></command>
-    <inputs>
-        <conditional name="input" >
-            <param name="ids" type="select" label="Provide your identifiers (Uniprot or Entrez gene)" help="Copy/paste or ID list from a file (e.g. table)" >
-                <option value="text">Copy/paste your identifiers</option>
-                <option value="file" selected="true">Input file containing your identifiers</option>
-            </param>
-            <when value="text" >
-                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' >
-                    <sanitizer invalid_char=''>
-                        <valid initial="string.printable">
-                            <remove value="&apos;"/>
-                        </valid>
-                        <mapping initial="none">
-                            <add source="&apos;" target="__sq__"/>
-                        </mapping>
-                    </sanitizer>
-                </param>
-            </when>
-            <when value="file" >
-                <param name="file" type="data" format="txt,tabular" label="Select a file that contains your list of IDs" help="" />
-                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-                <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
-            </when>
-        </conditional>
-        <conditional name="ref_ids">
-            <param name="id_type" type="select" label="select your identifiers type :">
-                <option value="uniprot">Uniprot Accession number</option>
-                <option value="geneID">Entrez gene ID</option>
-            </param>
-            <when value="uniprot">
-                <param name="ref_file" type="select" label="Select species" >
-                    <options from_data_table="uniprot_kegg_list"/>
-                        <option>Human (Homo sapiens)</option>
-                        <option>Mouse (Mus musculus)</option>
-                </param>
-            </when>
-            <when value="geneID">
-                <param name="ref_file" type="select" label="Select species" >
-                    <options from_data_table="entrez_kegg_list"/>
-                        <option>Human (Homo sapiens)</option>
-                        <option>Mouse (Mus musculus)</option>
-                </param>
-            </when>
-        </conditional>
-        <param type="integer" name="nb_pathways" label="Set the number of pathways to be displayed in the output" value="10" help="pathways are sorted by percent of mapping gene by pathway in descending order"/>
-    </inputs>
-    <outputs>
-        <data name="output1" format="tsv" />
-    </outputs>
-    <help><![CDATA[
-
-This tool computes the number of proteins from your list divided by the total number of protein of each KEGG pathway.
-
-it allows you to identify the signaling pathways that are the most covered by your proteomics dataset.
-
-By default the number of pathway to be displayed is set to 10 and sorted in descending order.
-
-At the moment two species are supported: Human (Homo sapiens) and Mouse (Mus musculus)
-
-**Input:**
-
-Input can be either a list of Uniprot accession number or Entrez gene IDs (copy/paste mode) or a file containing multiple columns but with at least one column Uniprot accession number or Entrez gene IDs. If your input file contains other type of IDs, please use the ID_Converter tool.
-
-**Output:**
-
-The output is a tabular file (.tsv) with the following columns:
-
-* **Pathway_ID:** KEGG pathway identifier (e.g. hsa04970)
-* **Description:** name of the pathway as in KEGG
-* **Ratio IDs mapped/total IDs (%):** percentage of pathway coverage
-* **nb genes mapped in the pathway:** number of genes/proteins of your list mapped on the KEGG pathway
-* **nb total genes present in the pathway:** total number of genes/proteins present in the KEGG pathway
-
------
-
-.. class:: infomark
-
-**Database:**
-
-Pathways and associated Uniprot Accession Number or Gene IDs are collected from KEGGREST package
-
-User manual / Documentation: KEGGREST Bioconductor package https://bioconductor.org/packages/3.1/bioc/html/KEGGREST.html
-
-
------
-
-.. class:: infomark
-
-**Galaxy integration**
-
-David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
-
-Sandra Dérozier, Olivier Rué, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
-
-This work has been partially funded through the French National Agency for Research (ANR) IFB project.
-
-Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
-
------
-
-compute_KEGG_pathways R script
-
-| Arguments:
-| --**help**                  Print this test
-| --**input**                 tab file
-| --**id_list**               id list ',' separated
-| --**id_type**               type of input ids (uniprot_AC or geneID)
-| --**id_column**             number og column containg ids of interest
-| --**org**                   organism : Hs , Mm, ...
-| --**nb_pathways**           number of pathways to return
-| --**header**                boolean
-| --**output**                output path
-| --**ref**                   ref file (l.hsa.gene.RData, l.hsa.up.RData)
-|
-| Example:
-| Rscript compute_KEGG_pathways.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE
-|
-
-    ]]></help>
-    <citations>
-        <citation type="bibtex">
-@misc{githubKEGGREST,
-  title = {KEGGREST: Client-side REST access to KEGG},
-  author = {Dan Tenenbaum},
-  year = {2018},
-  note = {R package version 1.18.1},
-  publisher = {GitHub},
-  journal = {GitHub repository},
-  url = {https://github.com/Bioconductor/KEGGREST},
-}</citation>
-    </citations>
-</tool>
--- a/entrez_kegg_list.loc.sample	Wed Sep 19 05:38:52 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-#name	date	organism	value(path)
-Human (Homo sapiens)	27-07-18	hsa	tool-data/l.hsa.gene.RData
-Mouse (Mus musculus)	27-07-18	mmu	tool-data/l.mmu.gene.RData
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kegg_identification.R	Tue Dec 18 10:00:40 2018 -0500
@@ -0,0 +1,203 @@
+options(warn=-1)  #TURN OFF WARNINGS !!!!!!
+
+suppressMessages(library(KEGGREST))
+
+get_args <- function(){
+
+  ## Collect arguments
+  args <- commandArgs(TRUE)
+
+  ## Default setting when no arguments passed
+  if(length(args) < 1) {
+    args <- c("--help")
+  }
+
+  ## Help section
+  if("--help" %in% args) {
+    cat("Pathview R script
+    Arguments:
+      --help                  Print this test
+      --input                 tab file
+      --id_list               id list ',' separated
+      --id_type               type of input ids (kegg-id, uniprot_AC,geneID)
+      --id_column             number og column containg ids of interest
+      --nb_pathways           number of pathways to return
+      --header                boolean
+      --output                output path
+      --species               species used to get specific pathways (hsa,mmu,rno)
+
+      Example:
+      Rscript keggrest.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE \n\n")
+
+    q(save="no")
+  }
+
+  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+  args <- as.list(as.character(argsDF$V2))
+  names(args) <- argsDF$V1
+
+  return(args)
+}
+
+str2bool <- function(x){
+  if (any(is.element(c("t","true"),tolower(x)))){
+    return (TRUE)
+  }else if (any(is.element(c("f","false"),tolower(x)))){
+    return (FALSE)
+  }else{
+    return(NULL)
+  }
+}
+
+read_file <- function(path,header){
+  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
+  if (inherits(file,"try-error")){
+    stop("File not found !")
+  }else{
+    return(file)
+  }
+}
+
+get_pathways_list <- function(species){
+  ##all available pathways for the species
+  pathways <-keggLink("pathway", species)
+  tot_path<-unique(pathways)
+
+  ##formating the dat into a list object
+  ##key= pathway ID, value = genes of the pathway in the kegg format
+  pathways_list <- sapply(tot_path, function(pathway) names(which(pathways==pathway)))
+  return (pathways_list)
+}
+
+get_list_from_cp <-function(list){
+  list = strsplit(list, "[ \t\n]+")[[1]]
+  list = gsub("[[:blank:]]|\u00A0|NA","",list)
+  list = list[which(!is.na(list[list != ""]))]    #remove empty entry
+  list = unique(gsub("-.+", "", list))  #Remove isoform accession number (e.g. "-2")
+  return(list)
+}
+
+geneID_to_kegg <- function(vector,species){
+  vector <- sapply(vector, function(x) paste(species,x,sep=":"),USE.NAMES = F)
+  return (vector)
+}
+
+to_keggID <- function(id_list,id_type){
+  if (id_type == "ncbi-geneid") {
+    id_list <-  unique(geneID_to_kegg(id_list,args$species))
+  } else if (id_type=="uniprot"){
+    id_list <- unique(sapply(id_list, function(x) paste(id_type,":",x,sep=""),USE.NAMES = F))
+    if (length(id_list)>250){
+      id_list <- split(id_list, ceiling(seq_along(id_list)/250))
+      id_list <- sapply(id_list, function(x) keggConv("genes",x))
+      id_list <- unique(unlist(id_list))
+    } else {
+      id_list <- unique(keggConv("genes", id_list))
+    }
+  } else if (id_type=="kegg-id") {
+    id_list <- unique(id_list)
+  }
+  return (id_list)
+}
+
+#take data frame, return  data frame
+split_ids_per_line <- function(line,ncol){
+
+  #print (line)
+  header = colnames(line)
+  line[ncol] = gsub("[[:blank:]]|\u00A0","",line[ncol])
+
+  if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) {
+    if (length(line)==1 ) {
+      lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F)
+    } else {
+      if (ncol==1) {                                #first column
+        lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)]))
+      } else if (ncol==length(line)) {                 #last column
+        lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";"))))
+      } else {
+        lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)]))
+      }
+    }
+    colnames(lines)=header
+    return(lines)
+  } else {
+    return(line)
+  }
+}
+
+#create new lines if there's more than one id per cell in the columns in order to have only one id per line
+one_id_one_line <-function(tab,ncol){
+
+  if (ncol(tab)>1){
+
+    tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x))
+    header=colnames(tab)
+    res=as.data.frame(matrix(ncol=ncol(tab),nrow=0))
+    for (i in 1:nrow(tab) ) {
+      lines = split_ids_per_line(tab[i,],ncol)
+      res = rbind(res,lines)
+    }
+  }else {
+    res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F)
+    res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F)
+    colnames(res)=colnames(tab)
+  }
+  return(res)
+}
+
+kegg_mapping<- function(kegg_id_list,id_type,ref_ids) {
+
+    #mapping
+    map<-lapply(ref_ids, is.element, unique(kegg_id_list))
+    names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE)    #remove the prefix "path:"
+
+    in.path<-sapply(map, function(x) length(which(x==TRUE)))
+    tot.path<-sapply(map, length)
+
+    ratio <- (as.numeric(in.path[which(in.path!=0)])) / (as.numeric(tot.path[which(in.path!=0)]))
+    ratio <- as.numeric(format(round(ratio*100, 2), nsmall = 2))
+
+    ##useful but LONG
+    ## to do before : in step 1
+    path.names<-names(in.path[which(in.path!=0)])
+    name <- sapply(path.names, function(x) keggGet(x)[[1]]$NAME,USE.NAMES = FALSE)
+
+    res<-data.frame(I(names(in.path[which(in.path!=0)])), I(name), ratio, as.numeric(in.path[which(in.path!=0)]), as.numeric(tot.path[which(in.path!=0)]))
+    res <- res[order(as.numeric(res[,3]),decreasing = TRUE),]
+    colnames(res)<-c("pathway_ID", "Description" , "Ratio IDs mapped/total IDs (%)" ,"nb KEGG genes IDs mapped in the pathway", "nb total of KEGG genes IDs present in the pathway")
+
+    return(res)
+
+}
+
+#get args from command line
+args <- get_args()
+
+#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/kegg_identification/args.Rda")
+#load("/home/dchristiany/proteore_project/ProteoRE/tools/kegg_identification/args.Rda")
+
+###setting variables
+header = str2bool(args$header)
+if (!is.null(args$id_list)) {id_list <- get_list_from_cp(args$id_list)}           #get ids from copy/paste input
+if (!is.null(args$input)) {                                                       #get ids from input file
+  csv <- read_file(args$input,header)
+  ncol <- as.numeric(gsub("c", "" ,args$id_column))
+  csv <- one_id_one_line(csv,ncol)
+  id_list <- as.vector(csv[,ncol])
+  id_list <- unique(id_list[which(!is.na(id_list[id_list!=""]))])
+}
+
+#convert to keggID if needed
+id_list <- to_keggID(id_list,args$id_type)
+
+#get pathways of species with associated KEGG ID genes
+pathways_list <- get_pathways_list(args$species)
+
+#mapping on pathways
+res <- kegg_mapping(id_list,args$id_type,pathways_list)
+if (nrow(res) > as.numeric(args$nb_pathways)) { res <- res[1:args$nb_pathways,] }
+
+write.table(res, file=args$output, quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kegg_identification.xml	Tue Dec 18 10:00:40 2018 -0500
@@ -0,0 +1,155 @@
+<tool id="kegg_identification" name="KEGG pathways identification" version="2018.12.12">
+    <description>and coverage</description>
+    <requirements>
+        <requirement type="package" version="1.18.0">bioconductor-keggrest</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript $__tool_directory__/kegg_identification.R
+
+        #if $input.ids == "text"
+            --id_list="$input.txt"
+        #else
+            --input="$input.file"
+            --id_column="$input.ncol"
+            --header="$input.header"
+        #end if
+
+        --id_type="$id_type"
+        --output="$output1"
+        --nb_pathways="$nb_pathways"
+        --species="$species"
+
+    ]]></command>
+    <inputs>
+        <conditional name="input" >
+            <param name="ids" type="select" label="Enter your IDs (Entrez gene IDs, KEGG gene IDs or UniProt Accession number)" help="Copy/paste or from a file (e.g. table)" >
+                <option value="text">Copy/paste your IDs</option>
+                <option value="file" selected="true">Input file containing your IDs</option>
+            </param>
+            <when value="text" >
+                <param name="txt" type="text" label="Copy/paste your IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: hsa:3306 hsa:22948' >
+                    <sanitizer invalid_char=''>
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt,tabular,tsv" label="Select your file" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+                <param name="ncol" type="text" value="c1" label="Column number of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />
+            </when>
+        </conditional>
+        <param name="id_type" type="select" label="Select IDs" >
+            <option value="ncbi-geneid" selected="true">Entrez Gene ID</option>
+            <option value="kegg-id">KEGG gene ID</option>
+            <option value="uniprot">UniProt Accession number</option>
+
+        </param>
+        <param name="species" type="select" label="Species">
+            <option value="hsa">Human (Homo sapiens)</option>
+            <option value="mmu">Mouse (Mus musculus)</option>
+            <option value="rno">Rat (Rattus norvegicus)</option>
+        </param>
+        <param type="integer" name="nb_pathways" label="Set number of pathways to be displayed" value="10" help="Pathways are sorted by percent of mapping gene by pathway in descending order"/>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tsv" />
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input" >
+                <param name="ids" value="file"/>
+                <param name="file" value="SPZ.soluble_kegg_id.txt" />
+                <param name="header" value="true" />
+                <param name="ncol" value="c2" />
+            </conditional>
+            <param name="id_type" value="kegg-id" />
+            <param name="species" value="hsa" />
+            <param name="nb_pathways" value="20" />
+            <output name="output1" value="test.tsv" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**Description**
+
+This tool maps a list of identifiers to all KEGG pathways and returns a list of KEGG pathways sorted by the percentage of coverage for each pathway. This is done by computing the number of KEGG genes IDs from your list divided by the total number of KEGG genes ID of the selected species for each KEGG pathway.
+
+It allows you to directly identify KEGG pathways that are the most covered by your gene/protein list.
+
+-----
+
+**Input**
+
+Input can be either a list of IDs or a file containing multiple columns but with at least one column of IDs (e.g. hsa:04970).
+Input IDs type can be Entrez gene ID (e.g. 2243), KEGG gene ID (e.g. hsa:2243 in homo sapiens) or Uniprot accession number (e.g. P02671).
+
+.. class:: warningmark
+
+If you use Uniprot accession number, it will be converted to KEGG gene ID; this step can be time consuming.
+
+If your input file contains other type of ID, please use the ID_Converter tool.
+
+-----
+
+**Parameters**
+
+"Set number of pathways to be displayed": by default the number of pathway to be displayed is set to 10 and sorted in descending order.
+
+"Species": three species are supported: Human (Homo sapiens), Mouse (Mus musculus) and Rat (Rattus norvegicus).
+
+-----
+
+**Output:**
+
+The output is a tabular file (.tsv) with the following columns:
+
+* **Pathway_ID:** KEGG pathway identifier (e.g. hsa04970)
+* **Description:** name of the pathway as in KEGG
+* **Ratio IDs mapped/total IDs (%):** pathway coverage in percentage
+* **nb KEGG genes IDs mapped in the pathway:** number of KEGG genes from your list mapped to the KEGG pathway
+* **nb total of KEGG genes IDs present in the pathway:** total number of KEGG genes present in the KEGG pathway
+
+-----
+
+.. class:: infomark
+
+**Data source (release date)**
+
+Pathways and KEGG genes IDs are collected via KEGGREST package.
+
+User manual / Documentation: KEGGREST Bioconductor package https://bioconductor.org/packages/3.1/bioc/html/KEGGREST.html
+
+-----
+
+.. class:: infomark
+
+**Galaxy integration**
+
+David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubKEGGREST,
+  title = {KEGGREST: Client-side REST access to KEGG},
+  author = {Dan Tenenbaum},
+  year = {2018},
+  note = {R package version 1.18.1},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/Bioconductor/KEGGREST},
+}</citation>
+    </citations>
+</tool>
--- a/test-data/SPZ.soluble.txt	Wed Sep 19 05:38:52 2018 -0400
+++ b/test-data/SPZ.soluble.txt	Tue Dec 18 10:00:40 2018 -0500
@@ -118,16 +118,13 @@
 Q8WXX0
 P13639
 Q14697
-P55809
-A0AVT1
+P55809;A0AVT1
 O14980
 Q9BVA1
 Q14697
 O95202
 O75694
-Q16851
-P26640
-P23368
+Q16851;P26640;P23368
 P55084
 P17174
 P07814
Binary file tool-data/l.hsa.gene.RData has changed
Binary file tool-data/l.hsa.up.RData has changed
Binary file tool-data/l.mmu.gene.RData has changed
Binary file tool-data/l.mmu.up.RData has changed
--- a/tool_data_table_conf.xml.sample	Wed Sep 19 05:38:52 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-<tables>
-    <!-- location uniprot_kegg_list for find pathways tool-->
-    <table name='uniprot_kegg_list' comment_char="#">
-        <columns>name,date,organism,value</columns>
-        <file path="tool-data/uniprot_kegg_list.loc"/>
-    </table>
-    <!-- location entrez_kegg_list for find pathways tool-->
-    <table name='entrez_kegg_list' comment_char="#">
-        <columns>name,date,organism,value</columns>
-        <file path="tool-data/entrez_kegg_list.loc"/>
-    </table>
-</tables>
--- a/uniprot_kegg_list.loc.sample	Wed Sep 19 05:38:52 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-#name	date	organism	value(path)
-Human (Homo sapiens)	27-07-18	hsa	tool-data/l.hsa.up.RData
-Mouse (Mus musculus)	27-07-18	mmu	tool-data/l.mmu.up.RData