Mercurial > repos > iuc > text_to_wordmatrix
annotate pubmed_by_queries.R @ 0:0692d11af909 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author | iuc |
---|---|
date | Wed, 24 Mar 2021 08:33:25 +0000 |
parents | |
children |
rev | line source |
---|---|
0
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
2 #tool: pubmed_by_queries |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
3 # |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
4 #This tool uses a set of search queries to download a defined number of abstracts or |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
5 #PMIDs for search query from PubMed. PubMed's search rules and syntax apply. |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
6 # |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
7 #Input: Tab-delimited table with search queries in a column starting with "ID_", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
8 #e.g. "ID_gene" if search queries are genes. |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
9 # |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
10 #Output: Input table with additional columns |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
11 #with PMIDs or abstracts (--abstracts) from PubMed. |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
12 # |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
13 #Usage: |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
14 #$pubmed_by_queries.R [-h] [-i INPUT] [-o OUTPUT] [-n NUMBER] [-a] [-k KEY] |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
15 # |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
16 #optional arguments: |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
17 # -h, --help show this help message and exit |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
18 # -i INPUT, --input INPUT input file name. add path if file is not in working directory |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
19 # -o OUTPUT, --output OUTPUT output file name. [default "pubmed_by_queries_output"] |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
20 # -n NUMBER, --number NUMBER number of PMIDs or abstracts to save per ID [default "5"] |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
21 # -a, --abstract if abstracts instead of PMIDs should be retrieved use --abstracts |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
22 # -k KEY, --key KEY if ncbi API key is available, add it to speed up the download of PubMed data. |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
23 # For usage in Galaxy add the API key to the Galaxy user-preferences (User/ Preferences/ Manage Information). |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
24 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
25 if ("--install_packages" %in% commandArgs()) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
26 print("Installing packages") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
27 if (!require("argparse")) install.packages("argparse", repo = "http://cran.rstudio.com/") ; |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
28 if (!require("easyPubMed")) install.packages("easyPubMed", repo = "http://cran.rstudio.com/") ; |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
29 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
30 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
31 suppressPackageStartupMessages(library("argparse")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
32 suppressPackageStartupMessages(library("easyPubMed")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
33 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
34 parser <- ArgumentParser() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
35 parser$add_argument("-i", "--input", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
36 help = "Input fie name. add path if file is not in working directory") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
37 parser$add_argument("-o", "--output", default = "pubmed_by_queries_output", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
38 help = "Output file name. [default \"%(default)s\"]") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
39 parser$add_argument("-n", "--number", type = "integer", default = 5, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
40 help = "Number of PMIDs (or abstracts) to save per ID. [default \"%(default)s\"]") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
41 parser$add_argument("-a", "--abstract", action = "store_true", default = FALSE, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
42 help = "If abstracts instead of PMIDs should be retrieved use --abstracts ") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
43 parser$add_argument("-k", "--key", type = "character", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
44 help = "If ncbi API key is available, add it to speed up the download of PubMed data. For usage in Galaxy add the API key to the Galaxy user-preferences (User/ Preferences/ Manage Information).") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
45 parser$add_argument("--install_packages", action = "store_true", default = FALSE, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
46 help = "If you want to auto install missing required packages.") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
47 args <- parser$parse_args() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
48 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
49 if (!is.null(args$key)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
50 if (file.exists(args$key)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
51 credentials <- read.table(args$key, quote = "\"", comment.char = "") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
52 args$key <- credentials[1, 1] |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
53 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
54 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
55 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
56 max_web_tries <- 100 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
57 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
58 data <- read.delim(args$input, stringsAsFactors = FALSE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
59 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
60 id_col_index <- grep("ID_", names(data)) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
61 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
62 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
63 fetch_pmids <- function(data, number, pubmed_search, query, row, max_web_tries) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
64 my_pubmed_url <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
65 "db=pubmed&retmax=", number, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
66 "&term=", pubmed_search$OriginalQuery, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
67 "&usehistory=n", sep = "") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
68 # get ids |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
69 idxml <- c() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
70 for (i in seq(max_web_tries)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
71 tryCatch({ |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
72 id_connect <- suppressWarnings(url(my_pubmed_url, open = "rb", encoding = "UTF8")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
73 idxml <- suppressWarnings(readLines(id_connect, warn = FALSE, encoding = "UTF8")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
74 suppressWarnings(close(id_connect)) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
75 break |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
76 }, error = function(e) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
77 print(paste("Error getting URL, sleeping", 2 * i, "seconds.")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
78 print(e) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
79 Sys.sleep(time = 2 * i) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
80 }) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
81 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
82 pmids <- c() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
83 for (i in seq(length(idxml))) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
84 if (grepl("^<Id>", idxml[i])) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
85 pmid <- custom_grep(idxml[i], tag = "Id", format = "char") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
86 pmids <- c(pmids, as.character(pmid[1])) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
87 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
88 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
89 if (length(pmids) > 0) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
90 data[row, sapply(seq(length(pmids)), function(i) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
91 paste0("PMID_", i) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
92 })] <- pmids |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
93 cat(length(pmids), " PMIDs for ", query, " are added in the table.", "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
94 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
95 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
96 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
97 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
98 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
99 fetch_abstracts <- function(data, number, query, pubmed_search) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
100 efetch_url <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
101 "db=pubmed&WebEnv=", pubmed_search$WebEnv, "&query_key=", pubmed_search$QueryKey, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
102 "&retstart=", 0, "&retmax=", number, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
103 "&rettype=", "null", "&retmode=", "xml", sep = "") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
104 api_key <- pubmed_search$APIkey |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
105 if (!is.null(api_key)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
106 efetch_url <- paste(efetch_url, "&api_key=", api_key, sep = "") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
107 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
108 # initialize |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
109 out_data <- NULL |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
110 try_num <- 1 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
111 t_0 <- Sys.time() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
112 # Try to fetch results |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
113 while (is.null(out_data)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
114 # Timing check: kill at 3 min |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
115 if (try_num > 1) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
116 Sys.sleep(time = 2 * try_num) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
117 cat("Problem to receive PubMed data or error is received. Please wait. Try number:", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
118 try_num, "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
119 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
120 t_1 <- Sys.time() |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
121 if (as.numeric(difftime(t_1, t_0, units = "mins")) > 3) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
122 message("Killing the request! Something is not working. Please, try again later", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
123 "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
124 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
125 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
126 # ENTREZ server connect |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
127 out_data <- tryCatch({ |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
128 tmp_connect <- suppressWarnings(url(efetch_url, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
129 open = "rb", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
130 encoding = "UTF8")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
131 suppressWarnings(readLines(tmp_connect, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
132 warn = FALSE, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
133 encoding = "UTF8")) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
134 }, error = function(e) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
135 print(e) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
136 }, finally = { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
137 try(suppressWarnings(close(tmp_connect)), |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
138 silent = TRUE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
139 }) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
140 # Check if error |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
141 if (!is.null(out_data) && |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
142 class(out_data) == "character" && |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
143 grepl("<ERROR>", substr(paste(utils::head(out_data, n = 100), |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
144 collapse = ""), 1, 250))) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
145 out_data <- NULL |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
146 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
147 try_num <- try_num + 1 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
148 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
149 if (is.null(out_data)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
150 message("Killing the request! Something is not working. Please, try again later", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
151 "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
152 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
153 } else { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
154 return(out_data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
155 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
156 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
157 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
158 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
159 process_xml_abstracts <- function(out_data) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
160 xml_data <- paste(out_data, collapse = "") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
161 # articles to list |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
162 xml_data <- strsplit(xml_data, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1] |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
163 xml_data <- sapply(xml_data, function(x) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
164 #trim extra stuff at the end of the record |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
165 if (!grepl("</PubmedArticle>$", x)) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
166 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
167 # Rebuid XML structure and proceed |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
168 x <- paste("<PubmedArticle>", x) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
169 gsub("[[:space:]]{2,}", " ", x) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
170 }, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
171 USE.NAMES = FALSE, simplify = TRUE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
172 #titles |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
173 titles <- sapply(xml_data, function(x) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
174 x <- custom_grep(x, tag = "ArticleTitle", format = "char") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
175 x <- gsub("</{0,1}i>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
176 x <- gsub("</{0,1}b>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
177 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
178 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
179 if (length(x) > 1) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
180 x <- paste(x, collapse = " ", sep = " ") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
181 } else if (length(x) < 1) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
182 x <- NA |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
183 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
184 x |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
185 }, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
186 USE.NAMES = FALSE, simplify = TRUE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
187 # abstracts |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
188 abstract_text <- sapply(xml_data, function(x) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
189 custom_grep(x, tag = "AbstractText", format = "char") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
190 }, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
191 USE.NAMES = FALSE, simplify = TRUE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
192 abstracts <- sapply(abstract_text, function(x) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
193 if (length(x) > 1) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
194 x <- paste(x, collapse = " ", sep = " ") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
195 x <- gsub("</{0,1}i>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
196 x <- gsub("</{0,1}b>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
197 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
198 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
199 } else if (length(x) < 1) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
200 x <- NA |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
201 } else { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
202 x <- gsub("</{0,1}i>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
203 x <- gsub("</{0,1}b>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
204 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
205 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
206 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
207 x |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
208 }, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
209 USE.NAMES = FALSE, simplify = TRUE) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
210 #add title to abstracts |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
211 if (length(titles) == length(abstracts)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
212 abstracts <- paste(titles, abstracts) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
213 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
214 return(abstracts) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
215 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
216 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
217 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
218 pubmed_data_in_table <- function(data, row, query, number, key, abstract) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
219 if (is.null(query)) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
220 print(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
221 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
222 pubmed_search <- get_pubmed_ids(query, api_key = key) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
223 if (as.numeric(pubmed_search$Count) == 0) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
224 cat("No PubMed result for the following query: ", query, "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
225 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
226 } else if (abstract == FALSE) { # fetch PMIDs |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
227 data <- fetch_pmids(data, number, pubmed_search, query, row, max_web_tries) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
228 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
229 } else if (abstract == TRUE) { # fetch abstracts and title text |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
230 out_data <- fetch_abstracts(data, number, query, pubmed_search) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
231 abstracts <- process_xml_abstracts(out_data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
232 #add abstracts to data frame |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
233 if (length(abstracts) > 0) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
234 data[row, sapply(seq(length(abstracts)), |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
235 function(i) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
236 paste0("ABSTRACT_", i) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
237 })] <- abstracts |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
238 cat(length(abstracts), " abstracts for ", query, " are added in the table.", |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
239 "\n") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
240 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
241 return(data) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
242 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
243 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
244 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
245 for (i in seq(nrow(data))) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
246 data <- tryCatch(pubmed_data_in_table(data = data, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
247 row = i, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
248 query = data[i, id_col_index], |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
249 number = args$number, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
250 key = args$key, |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
251 abstract = args$abstract), error = function(e) { |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
252 print("main error") |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
253 print(e) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
254 Sys.sleep(5) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
255 }) |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
256 } |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
257 |
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
iuc
parents:
diff
changeset
|
258 write.table(data, args$output, append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) |