annotate abstracts_by_pmids.R @ 0:02e46a96e98a draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author iuc
date Wed, 24 Mar 2021 08:34:22 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
1 #!/usr/bin/env Rscript
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
2 #TOOL2 abstracts_by_pmids
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
3 #
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
4 #This tool retrieves for all PMIDs in each row of a table the according abstracts and saves them in additional columns.
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
5 #
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
6 #Input: Tab-delimited table with columns containing PMIDs. The names of the PMID columns should start with “PMID”, e.g. “PMID_1”, “PMID_2” etc.
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
7 #
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
8 #Output: Input table with additional columns containing abstracts corresponding to the PMIDs from PubMed.
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
9 #The abstract columns are called "ABSTRACT_1", "ABSTARCT_2" etc.
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
10 #
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
11 # Usage: $ T2_abstracts_by_pmid.R [-h] [-i INPUT] [-o OUTPUT]
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
12 #
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
13 # optional arguments:
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
14 # -h, --help show help message
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
15 # -i INPUT, --input INPUT input file name. add path if file is not in working directory
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
16 # -o OUTPUT, --output OUTPUT output file name. [default "T2_output"]
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
17
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
18
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
19 if ("--install_packages" %in% commandArgs()) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
20 print("Installing packages")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
21 if (!require("argparse")) install.packages("argparse", repo = "http://cran.rstudio.com/");
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
22 if (!require("reutils")) install.packages("reutils", repo = "http://cran.rstudio.com/");
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
23 if (!require("easyPubMed")) install.packages("easyPubMed", repo = "http://cran.rstudio.com/");
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
24 if (!require("textclean")) install.packages("textclean", repo = "http://cran.rstudio.com/");
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
25 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
26
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
27 suppressPackageStartupMessages(library("argparse"))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
28 library("reutils")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
29 suppressPackageStartupMessages(library("easyPubMed"))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
30 suppressPackageStartupMessages(library("textclean"))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
31
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
32 parser <- ArgumentParser()
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
33 parser$add_argument("-i", "--input",
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
34 help = "input fie name. add path if file is not in workind directory")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
35 parser$add_argument("-o", "--output", default = "abstracts_by_pmids_output",
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
36 help = "output file name. [default \"%(default)s\"]")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
37 parser$add_argument("--install_packages", action = "store_true", default = FALSE,
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
38 help = "If you want to auto install missing required packages.")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
39
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
40 args <- parser$parse_args()
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
41
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
42 data <- read.delim(args$input, stringsAsFactors = FALSE, header = TRUE, sep = "\t")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
43 pmids_cols_index <- grep("PMID", names(data))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
44
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
45 fetch_abstracts <- function(pmids, row) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
46
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
47 efetch_result <- NULL
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
48 try_num <- 1
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
49 t_0 <- Sys.time()
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
50
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
51 while (is.null(efetch_result)) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
52
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
53 # Timing check: kill at 3 min
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
54 if (try_num > 1) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
55 Sys.sleep(time = 1 * try_num)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
56 cat("Problem to receive PubMed data or error is received. Please wait. Try number: ", try_num, "\n")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
57 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
58
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
59 t_1 <- Sys.time()
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
60
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
61 if (as.numeric(difftime(t_1, t_0, units = "mins")) > 3) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
62 message("Killing the request! Something is not working. Please, try again later", "\n")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
63 return(data)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
64 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
65
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
66 efetch_result <- tryCatch({
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
67 suppressWarnings(efetch(uid = pmids, db = "pubmed", retmode = "xml"))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
68 }, error = function(e) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
69 NULL
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
70 })
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
71
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
72 if (!is.null(as.list(efetch_result$errors)$error)) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
73 if (as.list(efetch_result$errors)$error == "HTTP error: Status 400; Bad Request") {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
74 efetch_result <- NULL
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
75 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
76 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
77
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
78 try_num <- try_num + 1
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
79
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
80 } #while loop end
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
81
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
82 # articles to list
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
83 xml_data <- strsplit(efetch_result$content, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1]
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
84 xml_data <- sapply(xml_data, function(x) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
85 #trim extra stuff at the end of the record
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
86 if (!grepl("</PubmedArticle>$", x))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
87 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
88 # Rebuid XML structure and proceed
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
89 x <- paste("<PubmedArticle>", x)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
90 gsub("[[:space:]]{2,}", " ", x)},
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
91 USE.NAMES = FALSE, simplify = TRUE)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
92
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
93 abstract_text <- sapply(xml_data, function(x) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
94 custom_grep(x, tag = "AbstractText", format = "char")},
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
95 USE.NAMES = FALSE, simplify = TRUE)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
96
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
97 abstracts <- sapply(abstract_text, function(x) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
98 if (length(x) > 1) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
99 x <- paste(x, collapse = " ", sep = " ")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
100 x <- gsub("</{0,1}i>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
101 x <- gsub("</{0,1}b>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
102 x <- gsub("</{0,1}sub>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
103 x <- gsub("</{0,1}exp>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
104 } else if (length(x) < 1) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
105 x <- NA
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
106 } else {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
107 x <- gsub("</{0,1}i>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
108 x <- gsub("</{0,1}b>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
109 x <- gsub("</{0,1}sub>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
110 x <- gsub("</{0,1}exp>", "", x, ignore.case = T)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
111 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
112 x
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
113 },
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
114 USE.NAMES = FALSE, simplify = TRUE)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
115
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
116 abstracts <- as.character(abstracts)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
117
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
118 if (length(abstracts) > 0) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
119 data[row, sapply(seq(length(abstracts)), function(i) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
120 paste0("ABSTRACT_", i)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
121 })] <- abstracts
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
122 cat(length(abstracts), " abstracts for PMIDs of row ", row, " are added in the table.", "\n")
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
123 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
124
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
125 return(data)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
126 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
127
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
128
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
129 for (row in seq(nrow(data))) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
130 pmids <- as.character(unique(data[row, pmids_cols_index]))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
131 pmids <- pmids[!pmids == "NA"]
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
132
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
133 if (length(pmids) > 0) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
134 data <- tryCatch(fetch_abstracts(pmids, row),
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
135 error = function(e) {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
136 Sys.sleep(3)
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
137 })
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
138 } else {
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
139 print(paste("No PMIDs in row", row))
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
140 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
141 }
02e46a96e98a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff changeset
142 write.table(data, args$output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)