Mercurial > repos > iuc > text_to_wordmatrix
annotate pubmed_by_queries.R @ 0:0692d11af909 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
| author | iuc | 
|---|---|
| date | Wed, 24 Mar 2021 08:33:25 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 1 #!/usr/bin/env Rscript | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 2 #tool: pubmed_by_queries | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 3 # | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 4 #This tool uses a set of search queries to download a defined number of abstracts or | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 5 #PMIDs for search query from PubMed. PubMed's search rules and syntax apply. | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 6 # | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 7 #Input: Tab-delimited table with search queries in a column starting with "ID_", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 8 #e.g. "ID_gene" if search queries are genes. | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 9 # | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 10 #Output: Input table with additional columns | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 11 #with PMIDs or abstracts (--abstracts) from PubMed. | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 12 # | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 13 #Usage: | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 14 #$pubmed_by_queries.R [-h] [-i INPUT] [-o OUTPUT] [-n NUMBER] [-a] [-k KEY] | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 15 # | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 16 #optional arguments: | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 17 # -h, --help show this help message and exit | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 18 # -i INPUT, --input INPUT input file name. add path if file is not in working directory | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 19 # -o OUTPUT, --output OUTPUT output file name. [default "pubmed_by_queries_output"] | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 20 # -n NUMBER, --number NUMBER number of PMIDs or abstracts to save per ID [default "5"] | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 21 # -a, --abstract if abstracts instead of PMIDs should be retrieved use --abstracts | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 22 # -k KEY, --key KEY if ncbi API key is available, add it to speed up the download of PubMed data. | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 23 # For usage in Galaxy add the API key to the Galaxy user-preferences (User/ Preferences/ Manage Information). | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 24 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 25 if ("--install_packages" %in% commandArgs()) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 26 print("Installing packages") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 27 if (!require("argparse")) install.packages("argparse", repo = "http://cran.rstudio.com/") ; | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 28 if (!require("easyPubMed")) install.packages("easyPubMed", repo = "http://cran.rstudio.com/") ; | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 29 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 30 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 31 suppressPackageStartupMessages(library("argparse")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 32 suppressPackageStartupMessages(library("easyPubMed")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 33 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 34 parser <- ArgumentParser() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 35 parser$add_argument("-i", "--input", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 36 help = "Input fie name. add path if file is not in working directory") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 37 parser$add_argument("-o", "--output", default = "pubmed_by_queries_output", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 38 help = "Output file name. [default \"%(default)s\"]") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 39 parser$add_argument("-n", "--number", type = "integer", default = 5, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 40 help = "Number of PMIDs (or abstracts) to save per ID. [default \"%(default)s\"]") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 41 parser$add_argument("-a", "--abstract", action = "store_true", default = FALSE, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 42 help = "If abstracts instead of PMIDs should be retrieved use --abstracts ") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 43 parser$add_argument("-k", "--key", type = "character", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 44 help = "If ncbi API key is available, add it to speed up the download of PubMed data. For usage in Galaxy add the API key to the Galaxy user-preferences (User/ Preferences/ Manage Information).") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 45 parser$add_argument("--install_packages", action = "store_true", default = FALSE, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 46 help = "If you want to auto install missing required packages.") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 47 args <- parser$parse_args() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 48 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 49 if (!is.null(args$key)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 50 if (file.exists(args$key)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 51 credentials <- read.table(args$key, quote = "\"", comment.char = "") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 52 args$key <- credentials[1, 1] | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 53 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 54 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 55 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 56 max_web_tries <- 100 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 57 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 58 data <- read.delim(args$input, stringsAsFactors = FALSE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 59 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 60 id_col_index <- grep("ID_", names(data)) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 61 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 62 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 63 fetch_pmids <- function(data, number, pubmed_search, query, row, max_web_tries) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 64 my_pubmed_url <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 65 "db=pubmed&retmax=", number, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 66 "&term=", pubmed_search$OriginalQuery, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 67 "&usehistory=n", sep = "") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 68 # get ids | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 69 idxml <- c() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 70 for (i in seq(max_web_tries)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 71 tryCatch({ | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 72 id_connect <- suppressWarnings(url(my_pubmed_url, open = "rb", encoding = "UTF8")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 73 idxml <- suppressWarnings(readLines(id_connect, warn = FALSE, encoding = "UTF8")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 74 suppressWarnings(close(id_connect)) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 75 break | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 76 }, error = function(e) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 77 print(paste("Error getting URL, sleeping", 2 * i, "seconds.")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 78 print(e) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 79 Sys.sleep(time = 2 * i) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 80 }) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 81 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 82 pmids <- c() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 83 for (i in seq(length(idxml))) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 84 if (grepl("^<Id>", idxml[i])) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 85 pmid <- custom_grep(idxml[i], tag = "Id", format = "char") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 86 pmids <- c(pmids, as.character(pmid[1])) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 87 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 88 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 89 if (length(pmids) > 0) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 90 data[row, sapply(seq(length(pmids)), function(i) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 91 paste0("PMID_", i) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 92 })] <- pmids | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 93 cat(length(pmids), " PMIDs for ", query, " are added in the table.", "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 94 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 95 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 96 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 97 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 98 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 99 fetch_abstracts <- function(data, number, query, pubmed_search) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 100 efetch_url <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 101 "db=pubmed&WebEnv=", pubmed_search$WebEnv, "&query_key=", pubmed_search$QueryKey, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 102 "&retstart=", 0, "&retmax=", number, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 103 "&rettype=", "null", "&retmode=", "xml", sep = "") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 104 api_key <- pubmed_search$APIkey | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 105 if (!is.null(api_key)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 106 efetch_url <- paste(efetch_url, "&api_key=", api_key, sep = "") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 107 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 108 # initialize | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 109 out_data <- NULL | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 110 try_num <- 1 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 111 t_0 <- Sys.time() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 112 # Try to fetch results | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 113 while (is.null(out_data)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 114 # Timing check: kill at 3 min | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 115 if (try_num > 1) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 116 Sys.sleep(time = 2 * try_num) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 117 cat("Problem to receive PubMed data or error is received. Please wait. Try number:", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 118 try_num, "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 119 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 120 t_1 <- Sys.time() | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 121 if (as.numeric(difftime(t_1, t_0, units = "mins")) > 3) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 122 message("Killing the request! Something is not working. Please, try again later", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 123 "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 124 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 125 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 126 # ENTREZ server connect | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 127 out_data <- tryCatch({ | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 128 tmp_connect <- suppressWarnings(url(efetch_url, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 129 open = "rb", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 130 encoding = "UTF8")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 131 suppressWarnings(readLines(tmp_connect, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 132 warn = FALSE, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 133 encoding = "UTF8")) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 134 }, error = function(e) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 135 print(e) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 136 }, finally = { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 137 try(suppressWarnings(close(tmp_connect)), | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 138 silent = TRUE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 139 }) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 140 # Check if error | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 141 if (!is.null(out_data) && | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 142 class(out_data) == "character" && | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 143 grepl("<ERROR>", substr(paste(utils::head(out_data, n = 100), | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 144 collapse = ""), 1, 250))) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 145 out_data <- NULL | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 146 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 147 try_num <- try_num + 1 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 148 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 149 if (is.null(out_data)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 150 message("Killing the request! Something is not working. Please, try again later", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 151 "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 152 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 153 } else { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 154 return(out_data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 155 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 156 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 157 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 158 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 159 process_xml_abstracts <- function(out_data) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 160 xml_data <- paste(out_data, collapse = "") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 161 # articles to list | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 162 xml_data <- strsplit(xml_data, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1] | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 163 xml_data <- sapply(xml_data, function(x) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 164 #trim extra stuff at the end of the record | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 165 if (!grepl("</PubmedArticle>$", x)) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 166 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 167 # Rebuid XML structure and proceed | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 168 x <- paste("<PubmedArticle>", x) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 169 gsub("[[:space:]]{2,}", " ", x) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 170 }, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 171 USE.NAMES = FALSE, simplify = TRUE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 172 #titles | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 173 titles <- sapply(xml_data, function(x) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 174 x <- custom_grep(x, tag = "ArticleTitle", format = "char") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 175 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 176 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 177 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 178 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 179 if (length(x) > 1) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 180 x <- paste(x, collapse = " ", sep = " ") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 181 } else if (length(x) < 1) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 182 x <- NA | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 183 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 184 x | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 185 }, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 186 USE.NAMES = FALSE, simplify = TRUE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 187 # abstracts | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 188 abstract_text <- sapply(xml_data, function(x) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 189 custom_grep(x, tag = "AbstractText", format = "char") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 190 }, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 191 USE.NAMES = FALSE, simplify = TRUE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 192 abstracts <- sapply(abstract_text, function(x) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 193 if (length(x) > 1) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 194 x <- paste(x, collapse = " ", sep = " ") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 195 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 196 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 197 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 198 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 199 } else if (length(x) < 1) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 200 x <- NA | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 201 } else { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 202 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 203 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 204 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 205 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 206 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 207 x | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 208 }, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 209 USE.NAMES = FALSE, simplify = TRUE) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 210 #add title to abstracts | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 211 if (length(titles) == length(abstracts)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 212 abstracts <- paste(titles, abstracts) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 213 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 214 return(abstracts) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 215 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 216 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 217 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 218 pubmed_data_in_table <- function(data, row, query, number, key, abstract) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 219 if (is.null(query)) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 220 print(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 221 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 222 pubmed_search <- get_pubmed_ids(query, api_key = key) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 223 if (as.numeric(pubmed_search$Count) == 0) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 224 cat("No PubMed result for the following query: ", query, "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 225 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 226 } else if (abstract == FALSE) { # fetch PMIDs | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 227 data <- fetch_pmids(data, number, pubmed_search, query, row, max_web_tries) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 228 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 229 } else if (abstract == TRUE) { # fetch abstracts and title text | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 230 out_data <- fetch_abstracts(data, number, query, pubmed_search) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 231 abstracts <- process_xml_abstracts(out_data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 232 #add abstracts to data frame | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 233 if (length(abstracts) > 0) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 234 data[row, sapply(seq(length(abstracts)), | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 235 function(i) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 236 paste0("ABSTRACT_", i) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 237 })] <- abstracts | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 238 cat(length(abstracts), " abstracts for ", query, " are added in the table.", | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 239 "\n") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 240 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 241 return(data) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 242 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 243 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 244 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 245 for (i in seq(nrow(data))) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 246 data <- tryCatch(pubmed_data_in_table(data = data, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 247 row = i, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 248 query = data[i, id_col_index], | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 249 number = args$number, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 250 key = args$key, | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 251 abstract = args$abstract), error = function(e) { | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 252 print("main error") | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 253 print(e) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 254 Sys.sleep(5) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 255 }) | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 256 } | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 257 | 
| 
0692d11af909
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
 iuc parents: diff
changeset | 258 write.table(data, args$output, append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) | 
