Mercurial > repos > iuc > pubmed_by_queries
annotate abstracts_by_pmids.R @ 0:02e46a96e98a draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author | iuc |
---|---|
date | Wed, 24 Mar 2021 08:34:22 +0000 |
parents | |
children |
rev | line source |
---|---|
0
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
2 #TOOL2 abstracts_by_pmids |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
3 # |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
4 #This tool retrieves for all PMIDs in each row of a table the according abstracts and saves them in additional columns. |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
5 # |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
6 #Input: Tab-delimited table with columns containing PMIDs. The names of the PMID columns should start with “PMID”, e.g. “PMID_1”, “PMID_2” etc. |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
7 # |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
8 #Output: Input table with additional columns containing abstracts corresponding to the PMIDs from PubMed. |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
9 #The abstract columns are called "ABSTRACT_1", "ABSTARCT_2" etc. |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
10 # |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
11 # Usage: $ T2_abstracts_by_pmid.R [-h] [-i INPUT] [-o OUTPUT] |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
12 # |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
13 # optional arguments: |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
14 # -h, --help show help message |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
15 # -i INPUT, --input INPUT input file name. add path if file is not in working directory |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
16 # -o OUTPUT, --output OUTPUT output file name. [default "T2_output"] |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
17 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
18 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
19 if ("--install_packages" %in% commandArgs()) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
20 print("Installing packages") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
21 if (!require("argparse")) install.packages("argparse", repo = "http://cran.rstudio.com/"); |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
22 if (!require("reutils")) install.packages("reutils", repo = "http://cran.rstudio.com/"); |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
23 if (!require("easyPubMed")) install.packages("easyPubMed", repo = "http://cran.rstudio.com/"); |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
24 if (!require("textclean")) install.packages("textclean", repo = "http://cran.rstudio.com/"); |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
25 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
26 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
27 suppressPackageStartupMessages(library("argparse")) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
28 library("reutils") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
29 suppressPackageStartupMessages(library("easyPubMed")) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
30 suppressPackageStartupMessages(library("textclean")) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
31 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
32 parser <- ArgumentParser() |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
33 parser$add_argument("-i", "--input", |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
34 help = "input fie name. add path if file is not in workind directory") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
35 parser$add_argument("-o", "--output", default = "abstracts_by_pmids_output", |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
36 help = "output file name. [default \"%(default)s\"]") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
37 parser$add_argument("--install_packages", action = "store_true", default = FALSE, |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
38 help = "If you want to auto install missing required packages.") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
39 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
40 args <- parser$parse_args() |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
41 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
42 data <- read.delim(args$input, stringsAsFactors = FALSE, header = TRUE, sep = "\t") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
43 pmids_cols_index <- grep("PMID", names(data)) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
44 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
45 fetch_abstracts <- function(pmids, row) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
46 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
47 efetch_result <- NULL |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
48 try_num <- 1 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
49 t_0 <- Sys.time() |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
50 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
51 while (is.null(efetch_result)) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
52 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
53 # Timing check: kill at 3 min |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
54 if (try_num > 1) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
55 Sys.sleep(time = 1 * try_num) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
56 cat("Problem to receive PubMed data or error is received. Please wait. Try number: ", try_num, "\n") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
57 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
58 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
59 t_1 <- Sys.time() |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
60 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
61 if (as.numeric(difftime(t_1, t_0, units = "mins")) > 3) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
62 message("Killing the request! Something is not working. Please, try again later", "\n") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
63 return(data) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
64 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
65 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
66 efetch_result <- tryCatch({ |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
67 suppressWarnings(efetch(uid = pmids, db = "pubmed", retmode = "xml")) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
68 }, error = function(e) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
69 NULL |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
70 }) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
71 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
72 if (!is.null(as.list(efetch_result$errors)$error)) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
73 if (as.list(efetch_result$errors)$error == "HTTP error: Status 400; Bad Request") { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
74 efetch_result <- NULL |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
75 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
76 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
77 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
78 try_num <- try_num + 1 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
79 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
80 } #while loop end |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
81 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
82 # articles to list |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
83 xml_data <- strsplit(efetch_result$content, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1] |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
84 xml_data <- sapply(xml_data, function(x) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
85 #trim extra stuff at the end of the record |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
86 if (!grepl("</PubmedArticle>$", x)) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
87 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
88 # Rebuid XML structure and proceed |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
89 x <- paste("<PubmedArticle>", x) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
90 gsub("[[:space:]]{2,}", " ", x)}, |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
91 USE.NAMES = FALSE, simplify = TRUE) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
92 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
93 abstract_text <- sapply(xml_data, function(x) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
94 custom_grep(x, tag = "AbstractText", format = "char")}, |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
95 USE.NAMES = FALSE, simplify = TRUE) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
96 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
97 abstracts <- sapply(abstract_text, function(x) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
98 if (length(x) > 1) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
99 x <- paste(x, collapse = " ", sep = " ") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
100 x <- gsub("</{0,1}i>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
101 x <- gsub("</{0,1}b>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
102 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
103 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
104 } else if (length(x) < 1) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
105 x <- NA |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
106 } else { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
107 x <- gsub("</{0,1}i>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
108 x <- gsub("</{0,1}b>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
109 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
110 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
111 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
112 x |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
113 }, |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
114 USE.NAMES = FALSE, simplify = TRUE) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
115 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
116 abstracts <- as.character(abstracts) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
117 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
118 if (length(abstracts) > 0) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
119 data[row, sapply(seq(length(abstracts)), function(i) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
120 paste0("ABSTRACT_", i) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
121 })] <- abstracts |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
122 cat(length(abstracts), " abstracts for PMIDs of row ", row, " are added in the table.", "\n") |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
123 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
124 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
125 return(data) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
126 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
127 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
128 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
129 for (row in seq(nrow(data))) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
130 pmids <- as.character(unique(data[row, pmids_cols_index])) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
131 pmids <- pmids[!pmids == "NA"] |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
132 |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
133 if (length(pmids) > 0) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
134 data <- tryCatch(fetch_abstracts(pmids, row), |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
135 error = function(e) { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
136 Sys.sleep(3) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
137 }) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
138 } else { |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
139 print(paste("No PMIDs in row", row)) |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
140 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
141 } |
02e46a96e98a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
142 write.table(data, args$output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) |