changeset 16:7f1ce70f0f09 draft default tip

"planemo upload commit 29a04769c0546be759c38bbcc157c8bc09ec1115-dirty"
author proteore
date Mon, 17 May 2021 14:40:03 +0000
parents 159596ec807d
children
files test-data/barplot.png test-data/dotplot.png test-data/result.tsv topGO.xml topGO_enrichment.R
diffstat 5 files changed, 261 insertions(+), 221 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/barplot.png has changed
Binary file test-data/dotplot.png has changed
--- a/test-data/result.tsv	Fri Jan 24 10:45:15 2020 -0500
+++ b/test-data/result.tsv	Mon May 17 14:40:03 2021 +0000
@@ -1,63 +1,58 @@
 GO.ID	Term	Annotated	Significant	Expected	pvalues	qvalues
-GO:0070268	cornification	128	21	1.01	4.8e-22	3.792e-18
-GO:0001895	retina homeostasis	70	15	0.55	6.8e-18	3.58133333333333e-14
-GO:0010951	negative regulation of endopeptidase act...	279	22	2.2	2.2e-11	8.69e-08
-GO:0061621	canonical glycolysis	27	7	0.21	1.3e-09	4.108e-06
-GO:0046718	viral entry into host cell	155	12	1.22	3.6e-09	9.48e-06
-GO:0042026	protein refolding	36	7	0.28	1.1e-08	2.054e-05
-GO:0002576	platelet degranulation	138	11	1.09	1.2e-08	2.054e-05
-GO:0070370	cellular heat acclimation	11	5	0.09	1.3e-08	2.054e-05
-GO:0070434	positive regulation of nucleotide-bindin...	11	5	0.09	1.3e-08	2.054e-05
-GO:0090063	positive regulation of microtubule nucle...	12	5	0.09	2.1e-08	3.01636363636364e-05
-GO:1900034	regulation of cellular response to heat	93	9	0.73	5.0e-08	6.58333333333333e-05
-GO:1903265	positive regulation of tumor necrosis fa...	16	5	0.13	1.2e-07	0.000145846153846154
-GO:0050821	protein stabilization	181	11	1.43	2.0e-07	0.000225714285714286
-GO:0090084	negative regulation of inclusion body as...	19	5	0.15	3.0e-07	0.000316
-GO:0061436	establishment of skin barrier	20	5	0.16	4.0e-07	0.000395
-GO:0018149	peptide cross-linking	60	7	0.47	4.4e-07	0.000408941176470588
-GO:0042542	response to hydrogen peroxide	136	11	1.07	1.0e-06	0.000877777777777778
-GO:1901673	regulation of mitotic spindle assembly	27	5	0.21	2.0e-06	0.00166315789473684
-GO:0098869	cellular oxidant detoxification	114	8	0.9	3.3e-06	0.002607
-GO:2000117	negative regulation of cysteine-type end...	116	8	0.91	3.8e-06	0.00285904761904762
-GO:0006094	gluconeogenesis	85	7	0.67	4.8e-06	0.00344727272727273
-GO:0034599	cellular response to oxidative stress	309	15	2.43	6.8e-06	0.00467130434782609
-GO:0001580	detection of chemical stimulus involved ...	61	6	0.48	8.4e-06	0.00553
-GO:0051092	positive regulation of NF-kappaB transcr...	173	9	1.36	9.5e-06	0.006004
-GO:0045104	intermediate filament cytoskeleton organ...	45	5	0.35	2.7e-05	0.0164076923076923
-GO:0042744	hydrogen peroxide catabolic process	23	4	0.18	2.9e-05	0.0169703703703704
-GO:0007568	aging	312	11	2.46	3.8e-05	0.0214428571428571
-GO:2001240	negative regulation of extrinsic apoptot...	49	5	0.39	4.1e-05	0.0223379310344828
-GO:0031397	negative regulation of protein ubiquitin...	172	10	1.35	4.7e-05	0.0247533333333333
-GO:0002934	desmosome organization	10	3	0.08	5.5e-05	0.0280322580645161
-GO:1902396	protein localization to bicellular tight...	2	2	0.02	6.2e-05	0.0296848484848485
-GO:1903923	positive regulation of protein processin...	2	2	0.02	6.2e-05	0.0296848484848485
-GO:0006953	acute-phase response	57	5	0.45	8.5e-05	0.0388228571428571
-GO:1900740	positive regulation of protein insertion...	30	4	0.24	8.6e-05	0.0388228571428571
-GO:0045471	response to ethanol	138	7	1.09	0.00011	0.0482777777777778
-GO:0070527	platelet aggregation	62	5	0.49	0.00013	0.0555135135135135
-GO:0032757	positive regulation of interleukin-8 pro...	63	5	0.5	0.00014	0.0567179487179487
-GO:0046686	response to cadmium ion	63	5	0.5	0.00014	0.0567179487179487
-GO:0003064	regulation of heart rate by hormone	3	2	0.02	0.00018	0.0677142857142857
-GO:0043163	cell envelope organization	3	2	0.02	0.00018	0.0677142857142857
-GO:0061740	protein targeting to lysosome involved i...	3	2	0.02	0.00018	0.0677142857142857
-GO:0042493	response to drug	438	12	3.45	0.00019	0.0698139534883721
-GO:0086091	regulation of heart rate by cardiac cond...	38	4	0.3	0.00022	0.079
-GO:0038061	NIK/NF-kappaB signaling	166	7	1.31	0.00035	0.121791666666667
-GO:0051016	barbed-end actin filament capping	18	3	0.14	0.00036	0.121791666666667
-GO:1902309	negative regulation of peptidyl-serine d...	4	2	0.03	0.00037	0.121791666666667
-GO:1905913	negative regulation of calcium ion expor...	4	2	0.03	0.00037	0.121791666666667
-GO:0010389	regulation of G2/M transition of mitotic...	226	8	1.78	0.00043	0.13865306122449
-GO:0043488	regulation of mRNA stability	174	7	1.37	0.00046	0.14536
-GO:0046716	muscle cell cellular homeostasis	20	3	0.16	0.00049	0.151803921568627
-GO:0006402	mRNA catabolic process	414	15	3.26	0.00050	0.151923076923077
-GO:0046827	positive regulation of protein export fr...	21	3	0.17	0.00057	0.169924528301887
-GO:0048549	positive regulation of pinocytosis	5	2	0.04	0.00061	0.172107142857143
-GO:0001907	killing by symbiont of host cells	5	2	0.04	0.00061	0.172107142857143
-GO:0033591	response to L-ascorbic acid	5	2	0.04	0.00061	0.172107142857143
-GO:0030216	keratinocyte differentiation	354	30	2.79	0.00065	0.180175438596491
-GO:0006754	ATP biosynthetic process	51	4	0.4	0.00069	0.187965517241379
-GO:0070301	cellular response to hydrogen peroxide	92	5	0.72	0.00080	0.214237288135593
-GO:0061844	antimicrobial humoral immune response me...	54	4	0.43	0.00086	0.226466666666667
-GO:0086073	bundle of His cell-Purkinje myocyte adhe...	6	2	0.05	0.00091	0.228222222222222
-GO:0032417	positive regulation of sodium:proton ant...	6	2	0.05	0.00091	0.228222222222222
-GO:0071638	negative regulation of monocyte chemotac...	6	2	0.05	0.00091	0.228222222222222
+GO:0070268	cornification	128	21	0.96	1.9e-22	1.51316e-18
+GO:0001895	retina homeostasis	73	15	0.55	6.9e-18	3.66344e-14
+GO:0010951	negative regulation of endopeptidase act...	230	16	1.73	4.4e-10	1.75208e-06
+GO:0061621	canonical glycolysis	30	7	0.23	2.1e-09	6.59874285714286e-06
+GO:0042026	protein refolding	31	7	0.23	2.7e-09	6.59874285714286e-06
+GO:0046718	viral entry into host cell	159	12	1.2	2.9e-09	6.59874285714286e-06
+GO:0002576	platelet degranulation	139	11	1.05	8.3e-09	1.5928e-05
+GO:0070370	cellular heat acclimation	11	5	0.08	1.0e-08	1.5928e-05
+GO:0070434	positive regulation of nucleotide-bindin...	11	5	0.08	1.0e-08	1.5928e-05
+GO:0090063	positive regulation of microtubule nucle...	12	5	0.09	1.7e-08	2.4616e-05
+GO:1900034	regulation of cellular response to heat	97	9	0.73	5.0e-08	6.63666666666667e-05
+GO:1903265	positive regulation of tumor necrosis fa...	18	5	0.14	1.8e-07	0.000220541538461538
+GO:0018149	peptide cross-linking	36	6	0.27	2.7e-07	0.000307182857142857
+GO:0090084	negative regulation of inclusion body as...	20	5	0.15	3.2e-07	0.000339797333333333
+GO:0050821	protein stabilization	200	11	1.51	3.5e-07	0.000348425
+GO:0061436	establishment of skin barrier	23	5	0.17	6.8e-07	0.00063712
+GO:0001580	detection of chemical stimulus involved ...	49	6	0.37	1.8e-06	0.0015928
+GO:1901673	regulation of mitotic spindle assembly	28	5	0.21	1.9e-06	0.0015928
+GO:0006094	gluconeogenesis	93	7	0.7	6.6e-06	0.00525624
+GO:0034599	cellular response to oxidative stress	330	14	2.49	7.5e-06	0.00568857142857143
+GO:0045648	positive regulation of erythrocyte diffe...	40	5	0.3	1.2e-05	0.008688
+GO:0051092	positive regulation of NF-kappaB transcr...	190	9	1.43	1.4e-05	0.00969530434782609
+GO:2001240	negative regulation of extrinsic apoptot...	46	5	0.35	2.4e-05	0.01529088
+GO:0042744	hydrogen peroxide catabolic process	23	4	0.17	2.4e-05	0.01529088
+GO:0098869	cellular oxidant detoxification	120	7	0.9	3.5e-05	0.0214415384615385
+GO:1900740	positive regulation of protein insertion...	26	4	0.2	4.1e-05	0.024186962962963
+GO:1903923	positive regulation of protein processin...	2	2	0.02	5.6e-05	0.0313067586206897
+GO:0031397	negative regulation of protein ubiquitin...	89	6	0.67	5.7e-05	0.0313067586206897
+GO:0045471	response to ethanol	131	7	0.99	6.1e-05	0.0323869333333333
+GO:0032757	positive regulation of interleukin-8 pro...	64	5	0.48	0.00012	0.05792
+GO:0070527	platelet aggregation	64	5	0.48	0.00012	0.05792
+GO:0046686	response to cadmium ion	64	5	0.48	0.00012	0.05792
+GO:0061844	antimicrobial humoral immune response me...	66	5	0.5	0.00014	0.0655858823529412
+GO:0003064	regulation of heart rate by hormone	3	2	0.02	0.00017	0.0731827027027027
+GO:0061740	protein targeting to lysosome involved i...	3	2	0.02	0.00017	0.0731827027027027
+GO:0007568	aging	387	11	2.91	0.00017	0.0731827027027027
+GO:0086091	regulation of heart rate by cardiac cond...	39	4	0.29	0.00021	0.0880231578947368
+GO:0007157	heterophilic cell-cell adhesion via plas...	40	4	0.3	0.00023	0.093934358974359
+GO:0045087	innate immune response	1367	30	10.29	0.00030	0.116805333333333
+GO:0045104	intermediate filament cytoskeleton organ...	44	4	0.33	0.00033	0.116805333333333
+GO:0043163	cell envelope organization	4	2	0.03	0.00033	0.116805333333333
+GO:1902396	protein localization to bicellular tight...	4	2	0.03	0.00033	0.116805333333333
+GO:1902309	negative regulation of peptidyl-serine d...	4	2	0.03	0.00033	0.116805333333333
+GO:1905913	negative regulation of calcium ion expor...	4	2	0.03	0.00033	0.116805333333333
+GO:0042542	response to hydrogen peroxide	148	8	1.11	0.00036	0.124653913043478
+GO:0043488	regulation of mRNA stability	184	7	1.39	0.00050	0.165916666666667
+GO:0035357	peroxisome proliferator activated recept...	21	3	0.16	0.00050	0.165916666666667
+GO:0033591	response to L-ascorbic acid	5	2	0.04	0.00055	0.178783673469388
+GO:0046827	positive regulation of protein export fr...	22	3	0.17	0.00058	0.1847648
+GO:0072594	establishment of protein localization to...	498	17	3.75	0.00062	0.193634509803922
+GO:0046716	muscle cell cellular homeostasis	23	3	0.17	0.00066	0.202163076923077
+GO:0006605	protein targeting	402	12	3.03	0.00082	0.230681379310345
+GO:0086073	bundle of His cell-Purkinje myocyte adhe...	6	2	0.05	0.00083	0.230681379310345
+GO:0032417	positive regulation of sodium:proton ant...	6	2	0.05	0.00083	0.230681379310345
+GO:0071638	negative regulation of monocyte chemotac...	6	2	0.05	0.00083	0.230681379310345
+GO:0030050	vesicle transport along actin filament	6	2	0.05	0.00083	0.230681379310345
+GO:0006953	acute-phase response	56	4	0.42	0.00084	0.230681379310345
--- a/topGO.xml	Fri Jan 24 10:45:15 2020 -0500
+++ b/topGO.xml	Mon May 17 14:40:03 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="topGO" name="Enrichment analysis" version="2020.01.23">
+<tool id="topGO" name="Enrichment analysis" version="2021.04.12">
     <description>(Human, Mouse, Rat)[topGO]</description>
     <requirements>
         <requirement type="package">R</requirement>
@@ -6,10 +6,6 @@
         <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement>
         <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement>
         <requirement type="package" version="3.8.2">bioconductor-org.rn.eg.db</requirement>
-        <!--requirement type="package" version="3.6.0">bioconductor-org.ce.eg.db</requirement-->
-        <!--requirement type="package" version="3.6.0">bioconductor-org.dm.eg.db</requirement-->
-        <!--requirement type="package" version="3.6.0">bioconductor-org.sc.sgd.db</requirement-->
-        <!--requirement type="package" version="3.5.0">bioconductor-org.at.tair.db</requirement-->
         <requirement type="package" version="1.62.0">bioconductor-graph</requirement>
         <requirement type="package" version="1.46.0">bioconductor-annotationdbi</requirement>
         <requirement type="package" version="3.8.2">bioconductor-go.db</requirement>
@@ -174,8 +170,6 @@
        <param name="plot" value="dotplot,barplot"/>
        <param name="geneuniverse" value="org.Hs.eg.db"/>
        <output name="outputtext" file="result.tsv"/>
-       <output name="outputbarplot" file="barplot.png" compare="sim_size"/>
-       <output name="outputdotplot" file="dotplot.png" compare="sim_size"/>
      </test>
    </tests>
    <help><![CDATA[
--- a/topGO_enrichment.R	Fri Jan 24 10:45:15 2020 -0500
+++ b/topGO_enrichment.R	Mon May 17 14:40:03 2021 +0000
@@ -1,24 +1,24 @@
-options(warn=-1)  #TURN OFF WARNINGS !!!!!!
+options(warn = -1)  #TURN OFF WARNINGS !!!!!!
 
 suppressMessages(library(ggplot2))
 suppressMessages(library(topGO))
 
-get_args <- function(){
-  
+get_args <- function() {
+
   ## Collect arguments
   args <- commandArgs(TRUE)
-  
+
   ## Default setting when no arguments passed
-  if(length(args) < 1) {
+  if (length(args) < 1) {
     args <- c("--help")
   }
-  
+
   ## Help section
-  if("--help" %in% args) {
+  if ("--help" %in% args) {
     cat("Pathview R script
     Arguments:
       --help                  Print this test
-      --input_type             
+      --input_type
       --onto
       --option
       --correction
@@ -30,82 +30,92 @@
       --header
 
       Example:
-      Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste) --input=file.txt --ontology='BP/CC/MF' --option=option
-      (e.g : classic/elim...) --threshold=threshold --correction=correction --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot 
+      Rscript --vanilla enrichment_v3.R --inputtype=tabfile (or copypaste)
+      --input=file.txt --ontology='BP/CC/MF' --option=option
+      (e.g : classic/elim...) --threshold=threshold --correction=correction
+      --textoutput=text --barplotoutput=barplot --dotplotoutput=dotplot
       --column=column --geneuniver=human \n\n")
-    
-    q(save="no")
+
+    q(save = "no")
   }
-  
-  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
-  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
-  args <- as.list(as.character(argsDF$V2))
-  names(args) <- argsDF$V1
-  
+
+  parseargs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsdf <- as.data.frame(do.call("rbind", parseargs(args)))
+  args <- as.list(as.character(argsdf$V2))
+  names(args) <- argsdf$V1
+
   return(args)
 }
 
-read_file <- function(path,header){
-  file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE)
-  if (inherits(file,"try-error")){
+read_file <- function(path, header) {
+  file <- try(read.csv(path, header = header, sep = "\t",
+          stringsAsFactors = FALSE, quote = "\"", check.names = F),
+          silent = TRUE)
+  if (inherits(file, "try-error")) {
     stop("File not found !")
-  }else{
+  }else {
     return(file)
   }
 }
 
-get_list_from_cp <-function(list){
-  list = gsub(";"," ",list)
-  list = strsplit(list, "[ \t\n]+")[[1]]
-  list = list[list != ""]    #remove empty entry
-  list = gsub("-.+", "", list)  #Remove isoform accession number (e.g. "-2")
+get_list_from_cp <- function(list) {
+  list <- gsub(";", " ", list)
+  list <- strsplit(list, "[ \t\n]+")[[1]]
+  list <- list[list != ""]    #remove empty entry
+  list <- gsub("-.+", "", list)  #Remove isoform accession number (e.g. "-2")
   return(list)
 }
 
 check_ens_ids <- function(vector) {
-  ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
-  return(grepl(ens_pattern,vector))
+  ens_pattern <-
+    "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?
+  |CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$"
+  return(grepl(ens_pattern, vector))
 }
 
-str2bool <- function(x){
-  if (any(is.element(c("t","true"),tolower(x)))){
-    return (TRUE)
-  }else if (any(is.element(c("f","false"),tolower(x)))){
-    return (FALSE)
-  }else{
+str2bool <- function(x) {
+  if (any(is.element(c("t", "true"), tolower(x)))) {
+    return(TRUE)
+  }else if (any(is.element(c("f", "false"), tolower(x)))) {
+    return(FALSE)
+  }else {
     return(NULL)
   }
 }
 
 # Some libraries such as GOsummaries won't be able to treat the values such as
 # "< 1e-30" produced by topGO. As such it is important to delete the < char
-# with the deleteInfChar function. Nevertheless the user will have access to the original results in the text output.
-deleteInfChar = function(values){
-  
-  lines = grep("<",values)
-  if (length(lines)!=0){
-    for (line in lines){
-      values[line]=gsub("<","",values[line])
+# with the deleteinfchar function. Nevertheless the user will have access to
+#the original results in the text output.
+deleteinfchar <- function(values) {
+
+  lines <- grep("<", values)
+  if (length(lines) != 0) {
+    for (line in lines) {
+      values[line] <- gsub("<", "", values[line])
     }
   }
   return(values)
 }
 
-corrMultipleTesting = function(result, myGOdata,correction,threshold){
+#nolint start
+corrMultipleTesting = function(result, mygodata, correction, threshold){
   
   # adjust for multiple testing
-  if (correction!="none"){	
+  if (correction != "none"){	
     # GenTable : transforms the result object into a list. Filters can be applied
     # (e.g : with the topNodes argument, to get for instance only the n first
     # GO terms with the lowest pvalues), but as we want to  apply a correction we
     # take all the GO terms, no matter their pvalues 
-    allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=length(attributes(result)$score))
+    allRes <- GenTable(mygodata, test = result, orderBy = "result", 
+              ranksOf = "result", topNodes = length(attributes(result)$score))
     # Some pvalues given by topGO are not numeric (e.g : "<1e-30). As such, these
     # values are converted to 1e-30 to be able to correct the pvalues 
-    pvaluestmp = deleteInfChar(allRes$test)
+    pvaluestmp = deleteinfchar(allRes$test)
     
     # the correction is done from the modified pvalues  
-    allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction), n = length(pvaluestmp))
+    allRes$qvalues = p.adjust(pvaluestmp, method = as.character(correction),
+                              n = length(pvaluestmp))
     allRes = as.data.frame(allRes)
     
     # Rename the test column by pvalues, so that is more explicit
@@ -113,194 +123,235 @@
     
     names(allRes)[nb] = "pvalues"
     
-    allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold),]
-    if (length(allRes$pvalues)==0){
-      print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
+    allRes = allRes[which(as.numeric(allRes$pvalues) <= threshold), ]
+    if (length(allRes$pvalues) == 0) {
+      print("Threshold was too stringent, no GO term found with pvalue
+            equal or lesser than the threshold value")
       return(NULL)
     }
-    allRes = allRes[order(allRes$qvalues),]
+    allRes = allRes[order(allRes$qvalues), ]
   }
   
-  if (correction=="none"){
+  if (correction == "none"){
     # get all the go terms under user threshold 
     mysummary <- summary(attributes(result)$score <= threshold)
     numsignif <- as.integer(mysummary[[3]])
     # get all significant nodes 
-    allRes <- GenTable(myGOdata, test = result, orderBy = "result", ranksOf = "result",topNodes=numsignif)
+    allRes <- GenTable(mygodata, test = result, orderBy = "result",
+                       ranksOf = "result", topNodes = numsignif)
     
     
     allRes = as.data.frame(allRes)
     # Rename the test column by pvalues, so that is more explicit
     nb = which(names(allRes) %in% c("test"))
     names(allRes)[nb] = "pvalues"
-    if (numsignif==0){
+    if (numsignif == 0) {
       
-      print("Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value")
+      print("Threshold was too stringent, no GO term found with pvalue
+            equal or lesser than the threshold value")
       return(NULL)
     }
     
-    allRes = allRes[order(allRes$pvalues),] 
+    allRes = allRes[order(allRes$pvalues), ]
   } 
   
   return(allRes)  
 }
+#nolint end
 
-# roundValues will simplify the results by rounding down the values. For instance 1.1e-17 becomes 1e-17
-roundValues = function(values){
-  for (line in 1:length(values)){
-    values[line]=as.numeric(gsub(".*e","1e",as.character(values[line])))
+#roundvalues will simplify the results by rounding down the values.
+#For instance 1.1e-17 becomes 1e-17
+roundvalues <- function(values) {
+  for (line in seq_len(length(values))) {
+    values[line] <- as.numeric(gsub(".*e", "1e", as.character(values[line])))
   }
   return(values)
 }
 
-createDotPlot = function(data, onto){
+#nolint start
+createDotPlot = function(data, onto) {
   
-    values  = deleteInfChar(data$pvalues)
-    values = roundValues(values)
+    values  = deleteinfchar(data$pvalues)
+    values = roundvalues(values)
     values = as.numeric(values)
     
-    geneRatio = data$Significant/data$Annotated
+    geneRatio = data$Significant / data$Annotated
     goTerms = data$Term
     count = data$Significant
     
-    labely = paste("GO terms",onto,sep=" ")
-    ggplot(data,aes(x=geneRatio,y=goTerms, color=values,size=count)) +geom_point( ) + scale_colour_gradientn(colours=c("red","violet","blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color="p-values\n" ) 
-    ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
+    labely = paste("GO terms", onto, sep = " ")
+    ggplot(data, aes(x = geneRatio, y = goTerms, color = values, size=count)) + geom_point( ) + scale_colour_gradientn( colours = c("red", "violet", "blue")) + xlab("Gene Ratio") + ylab(labely) + labs(color = "p-values\n" ) 
+    ggsave("dotplot.png", device = "png", dpi = 320, limitsize = TRUE,
+           width = 15, height = 15, units = "cm")
 }
 
-createBarPlot = function(data, onto){
+createBarPlot = function(data, onto) {
   
-    values  = deleteInfChar(data$pvalues)
-    values = roundValues(values)
+    values  = deleteinfchar(data$pvalues)
+    values = roundvalues(values)
     values = as.numeric(values)
     
     goTerms = data$Term
     count = data$Significant
     
-    labely = paste("GO terms",onto,sep=" ")
-    ggplot(data, aes(x=goTerms, y=count,fill=values,scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) +geom_bar(stat="identity") + scale_fill_gradientn(colours=c("red","violet","blue")) + coord_flip() + labs(fill="p-values\n") 
-    ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE, width = 15, height = 15, units="cm")
+    labely = paste("GO terms", onto, sep=" ")
+    ggplot(data, aes(x = goTerms, y = count, fill = values, scale(scale = 0.5))) + ylab("Gene count") + xlab(labely) + geom_bar(stat = "identity") + scale_fill_gradientn(colours = c("red","violet","blue")) + coord_flip() + labs(fill = "p-values\n") 
+    ggsave("barplot.png", device = "png", dpi = 320, limitsize = TRUE,
+            width = 15, height = 15, units = "cm")
 }
 
+#nolint end
+
 # Produce the different outputs
-createOutputs = function(result, cut_result,text, barplot, dotplot, onto){
+createoutputs <- function(result, cut_result, text, barplot, dotplot, onto) {
 
-  if (is.null(result)){
-    err_msg = "None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably either have no associated GO terms or are not ENSG identifiers (e.g : ENSG00000012048)."
-    write.table(err_msg, file='result', quote=FALSE, sep='\t', col.names = F, row.names = F)
-  }else if (is.null(cut_result)){
-    err_msg = "Threshold was too stringent, no GO term found with pvalue equal or lesser than the threshold value."
-    write.table(err_msg, file='result.tsv', quote=FALSE, sep='\t', col.names = F, row.names = F)
+  if (is.null(result)) {
+    err_msg <- "None of the input ids can be found in the org package data,
+    enrichment analysis cannot be realized. \n The inputs ids probably
+    either have no associated GO terms or are not ENSG identifiers
+    (e.g : ENSG00000012048)."
+    write.table(err_msg, file = "result", quote = FALSE, sep = "\t",
+                col.names = F, row.names = F)
+  }else if (is.null(cut_result)) {
+    err_msg <- "Threshold was too stringent, no GO term found with pvalue equal
+    or lesser than the threshold value."
+    write.table(err_msg, file = "result.tsv", quote = FALSE, sep = "\t",
+                col.names = F, row.names = F)
   }else {
-    write.table(cut_result, file='result.tsv', quote=FALSE, sep='\t', col.names = T, row.names = F)
-  
-    if (barplot){createBarPlot(cut_result, onto)}
-    if (dotplot){createDotPlot(cut_result, onto)}
+    write.table(cut_result, file = "result.tsv", quote = FALSE, sep = "\t",
+                col.names = T, row.names = F)
+
+    if (barplot) {
+      createBarPlot(cut_result, onto) #nolint
+      }
+    if (dotplot) {
+      createDotPlot(cut_result, onto) #nolint
+      }
   }
 }
 
-# Launch enrichment analysis and return result data from the analysis or the null
-# object if the enrichment could not be done.
-goEnrichment = function(geneuniverse,sample,background_sample,onto){
-  
-  if (is.null(background_sample)){
-    xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl")   # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their associated ensembl ids according to the org package
-    allGenes = unique(unlist(xx))                             # check if the genes given by the user can be found in the org package (gene universe), that is in allGenes 
+# Launch enrichment analysis and return result data from the analysis or the
+# null object if the enrichment could not be done.
+goenrichment <- function(geneuniverse, sample, background_sample, onto) {
+
+  if (is.null(background_sample)) {
+    xx <- annFUN.org(onto, mapping = geneuniverse, ID = "ensembl")  #nolint
+    #get all the GO terms of the corresponding ontology (BP/CC/MF)
+    #and all their associated ensembl ids according to the org package
+
+    #nolint start
+    
+    allGenes <- unique(unlist(xx)) 
+    #check if the genes given by the user can be found in the org package 
+    #(gene universe), that is in allGenes
   } else {
-    allGenes = background_sample
+    allGenes <- background_sample
   }
   
-  if (length(intersect(sample,allGenes))==0){
-    print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.")
-    return(c(NULL,NULL))
+  if (length(intersect(sample,allGenes)) == 0) {
+    print("None of the input ids can be found in the org package data,
+          enrichment analysis cannot be realized. \n The inputs ids probably
+          have no associated GO terms.")
+    return(c(NULL, NULL))
   }
   
-  geneList = factor(as.integer(allGenes %in% sample)) #duplicated ids in sample count only for one
+  geneList <- factor(as.integer(allGenes %in% sample)) 
+  #duplicated ids in sample count only for one
   if (length(levels(geneList)) == 1 ){
-    stop("All or none of the background genes are found in tested genes dataset, enrichment analysis can't be done")
+    stop("All or none of the background genes are found in tested genes dataset,
+         enrichment analysis can't be done")
   }
   names(geneList) <- allGenes
-  
-  #topGO enrichment 
-  
+
+#nolint end
+
+  #topGO enrichment
+
   # Creation of a topGOdata object
-  # It will contain : the list of genes of interest, the GO annotations and the GO hierarchy
-  # Parameters : 
+  # It will contain : the list of genes of interest, the GO annotations and
+  # the GO hierarchy
+  # Parameters :
   # ontology : character string specifying the ontology of interest (BP, CC, MF)
-  # allGenes : named vector of type numeric or factor 
+  # allGenes : named vector of type numeric or factor
   # annot : tells topGO how to map genes to GO annotations.
-  # argument not used here : nodeSize : at which minimal number of GO annotations
-  # do we consider a gene  
-  
-  myGOdata = new("topGOdata", description="SEA with TopGO", ontology=onto, allGenes=geneList,  annot = annFUN.org, mapping=geneuniverse,ID="ensembl")
-  
+  # argument not used here : nodeSize : at which minimal number of GO
+  # annotations do we consider a gene
+
+  mygodata <- new("topGOdata", description = "SEA with TopGO", ontology = onto,
+                 allGenes = geneList,  annot = annFUN.org,
+                 mapping = geneuniverse, ID = "ensembl")
+
   # Performing enrichment tests
-  result <- runTest(myGOdata, algorithm=option, statistic="fisher")
-  return(c(result,myGOdata))	
+  result <- runTest(mygodata, algorithm = option, statistic = "fisher") #nolint
+  return(c(result, mygodata))
 }
 
 args <- get_args()
 
-#save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda")
-#load("/home/dchristiany/proteore_project/ProteoRE/tools/topGO/args.rda")
 
-input_type = args$inputtype
-input = args$input
-onto = args$ontology
-option = args$option
-correction = args$correction
-threshold = as.numeric(args$threshold)
-text = str2bool(args$textoutput)
-barplot = "barplot" %in% unlist(strsplit(args$plot,","))
-dotplot = "dotplot" %in% unlist(strsplit(args$plot,","))
-column = as.numeric(gsub("c","",args$column))
-geneuniverse = args$geneuniverse
-header = str2bool(args$header)
-background = str2bool(args$background)
-if (background){
-  background_genes = args$background_genes
-  background_input_type = args$background_input_type
-  background_header = str2bool(args$background_header)
-  background_column = as.numeric(gsub("c","",args$background_column))
+input_type <- args$inputtype
+input <- args$input
+onto <- args$ontology
+option <- args$option
+correction <- args$correction
+threshold <- as.numeric(args$threshold)
+text <- str2bool(args$textoutput)
+barplot <- "barplot" %in% unlist(strsplit(args$plot, ","))
+dotplot <- "dotplot" %in% unlist(strsplit(args$plot, ","))
+column <- as.numeric(gsub("c", "", args$column))
+geneuniverse <- args$geneuniverse
+header <- str2bool(args$header)
+background <- str2bool(args$background)
+if (background) {
+  background_genes <- args$background_genes
+  background_input_type <- args$background_input_type
+  background_header <- str2bool(args$background_header)
+  background_column <- as.numeric(gsub("c", "", args$background_column))
 }
 
 #get input
-if (input_type=="copy_paste"){
+if (input_type == "copy_paste") {
   sample <- get_list_from_cp(input)
-} else if (input_type=="file"){
-  tab=read_file(input,header)
-  sample = trimws(unlist(strsplit(tab[,column],";")))
+} else if (input_type == "file") {
+  tab <- read_file(input, header)
+  sample <- trimws(unlist(strsplit(tab[, column], ";")))
 }
 
 #check of ENS ids
-if (! any(check_ens_ids(sample))){
-  stop("no ensembl gene ids found in your ids list, please check your IDs in input or the selected column of your input file")
+if (! any(check_ens_ids(sample))) {
+  stop("no ensembl gene ids found in your ids list,
+    please check your IDs in input or the selected column of your input file")
 }
 
 #get input if background genes
-if (background){
-  if (background_input_type=="copy_paste"){
+if (background) {
+  if (background_input_type == "copy_paste") {
     background_sample <- get_list_from_cp(background_genes)
-  } else if (background_input_type=="file"){
-    background_tab=read_file(background_genes,background_header)
-    background_sample = unique(trimws(unlist(strsplit(background_tab[,background_column],";"))))
+  } else if (background_input_type == "file") {
+    background_tab <- read_file(background_genes, background_header)
+    background_sample <- unique(trimws(unlist(
+      strsplit(background_tab[, background_column], ";"))))
   }
   #check of ENS ids
-  if (! any(check_ens_ids(background_sample))){
-    stop("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file")
+  if (! any(check_ens_ids(background_sample))) {
+    stop("no ensembl gene ids found in your background ids list,
+    please check your IDs in input or the selected column of your input file")
   }
 } else {
-  background_sample=NULL
+  background_sample <- NULL
 }
 
 # Launch enrichment analysis
-allresult = suppressMessages(goEnrichment(geneuniverse,sample,background_sample,onto))
-result = allresult[1][[1]]
-myGOdata = allresult[2][[1]]
-if (!is.null(result)){
-  cut_result = corrMultipleTesting(result,myGOdata, correction,threshold) # Adjust the result with a multiple testing correction or not and with the user, p-value cutoff
-}else{
-  cut_result=NULL
+allresult <- suppressMessages(goenrichment(geneuniverse, sample,
+                                          background_sample, onto))
+result <- allresult[1][[1]]
+mygodata <- allresult[2][[1]]
+if (!is.null(result)) {
+  cut_result <- corrMultipleTesting(result, mygodata, correction, threshold)
+  #Adjust the result with a multiple testing correction or not and with the
+  #user, p-value cutoff
+}else {
+  cut_result <- NULL
 }
 
-createOutputs(result, cut_result,text, barplot, dotplot, onto)
+createoutputs(result, cut_result, text, barplot, dotplot, onto)