annotate pubmed_by_queries.R @ 0:34ed44f3f85c draft

"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
author dlalgroup
date Thu, 24 Sep 2020 02:17:05 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
1 #!/usr/bin/env Rscript
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
2 #tool: pubmed_by_queries
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
3 #
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
4 #This tool uses a set of search queries to download a defined number of abstracts or PMIDs for search query from PubMed.
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
5 #PubMed's search rules and syntax apply.
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
6 #
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
7 #Input: Tab-delimited table with search queries in a column starting with "ID_", e.g. "ID_gene" if search queries are genes.
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
8 #
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
9 # Output: Input table with additional columns with PMIDs or abstracts (--abstracts) from PubMed.
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
10 #
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
11 #Usage: $ pubmed_by_queries.R [-h] [-i INPUT] [-o OUTPUT] [-n NUMBER] [-a] [-k KEY]
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
12 #
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
13 # optional arguments:
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
14 # -h, --help show this help message and exit
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
15 # -i INPUT, --input INPUT input file name. add path if file is not in working directory
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
16 # -o OUTPUT, --output OUTPUT output file name. [default "pubmed_by_queries_output"]
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
17 # -n NUMBER, --number NUMBER number of PMIDs or abstracts to save per ID [default "5"]
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
18 # -a, --abstract if abstracts instead of PMIDs should be retrieved use --abstracts
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
19 # -k KEY, --key KEY if NCBI API key is available, add it to speed up the fetching of pubmed data
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
20
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
21 if ( '--install_packages' %in% commandArgs()) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
22 print('Installing packages')
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
23 if (!require('argparse')) install.packages('argparse',repo="http://cran.rstudio.com/");
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
24 if (!require('easyPubMed')) install.packages('easyPubMed',repo="http://cran.rstudio.com/");
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
25 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
26
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
27 suppressPackageStartupMessages(library("argparse"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
28 suppressPackageStartupMessages(library("easyPubMed"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
29
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
30 parser <- ArgumentParser()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
31 parser$add_argument("-i", "--input",
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
32 help = "input fie name. add path if file is not in working directory")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
33 parser$add_argument("-o", "--output", default="pubmed_by_queries_output",
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
34 help = "output file name. [default \"%(default)s\"]")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
35 parser$add_argument("-n", "--number", type="integer", default=5,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
36 help="Number of PMIDs (or abstracts) to save per ID. [default \"%(default)s\"]")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
37 parser$add_argument("-a", "--abstract", action="store_true", default=FALSE,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
38 help="if abstracts instead of PMIDs should be retrieved use --abstracts ")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
39 parser$add_argument("-k", "--key", type="character",
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
40 help="if ncbi API key is available, add it to speed up the download of pubmed data")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
41 parser$add_argument("--install_packages", action="store_true", default=FALSE,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
42 help="If you want to auto install missing required packages.")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
43 args <- parser$parse_args()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
44
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
45 MAX_WEB_TRIES = 100
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
46
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
47 data = read.delim(args$input, stringsAsFactors=FALSE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
48
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
49 id_col_index <- grep("ID_", names(data))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
50
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
51 pubmed_data_in_table <- function(data, row, query, number, key, abstract){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
52 if (is.null(query)){print(data)}
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
53 pubmed_search <- get_pubmed_ids(query, api_key = key)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
54
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
55 if(as.numeric(pubmed_search$Count) == 0){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
56 cat("No PubMed result for the following query: ", query, "\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
57 return(data)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
58
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
59 } else if (abstract == FALSE) { # fetch PMIDs
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
60
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
61 myPubmedURL <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?",
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
62 "db=pubmed&retmax=", number, "&term=", pubmed_search$OriginalQuery, "&usehistory=n", sep = "")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
63 # get ids
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
64 idXML <- c()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
65 for (i in 1:MAX_WEB_TRIES){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
66 tryCatch({
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
67 IDconnect <- suppressWarnings(url(myPubmedURL, open = "rb", encoding = "UTF8"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
68 idXML <- suppressWarnings(readLines(IDconnect, warn = FALSE, encoding = "UTF8"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
69 suppressWarnings(close(IDconnect))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
70 break
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
71 }, error = function(e) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
72 print(paste('Error getting URL, sleeping',2*i,'seconds.'))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
73 print(e)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
74 Sys.sleep(time = 2*i)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
75 })
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
76 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
77
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
78 PMIDs = c()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
79
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
80 for (i in 1:length(idXML)) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
81 if (grepl("^<Id>", idXML[i])) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
82 pmid <- custom_grep(idXML[i], tag = "Id", format = "char")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
83 PMIDs <- c(PMIDs, as.character(pmid[1]))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
84 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
85 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
86
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
87
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
88 if(length(PMIDs)>0){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
89 data[row,sapply(1:length(PMIDs),function(i){paste0("PMID_",i)})] <- PMIDs
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
90 cat(length(PMIDs)," PMIDs for ",query, " are added in the table.", "\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
91 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
92
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
93 return(data)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
94
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
95 } else if (abstract == TRUE) { # fetch abstracts and title text
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
96
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
97 efetch_url = paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?",
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
98 "db=pubmed&WebEnv=", pubmed_search$WebEnv, "&query_key=", pubmed_search$QueryKey,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
99 "&retstart=", 0, "&retmax=", number,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
100 "&rettype=", "null","&retmode=", "xml", sep = "")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
101
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
102 api_key <- pubmed_search$APIkey
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
103 if (!is.null(api_key)) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
104 efetch_url <- paste(efetch_url, "&api_key=", api_key, sep = "")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
105 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
106
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
107 # initialize
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
108 out.data <- NULL
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
109 try_num <- 1
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
110 t_0 <- Sys.time()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
111
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
112 # Try to fetch results
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
113 while(is.null(out.data)) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
114
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
115 # Timing check: kill at 3 min
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
116 if (try_num > 1){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
117 Sys.sleep(time = 2*try_num)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
118 cat("Problem to receive PubMed data or error is received. Please wait. Try number:",try_num,"\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
119 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
120
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
121 t_1 <- Sys.time()
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
122
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
123 if(as.numeric(difftime(t_1, t_0, units = "mins")) > 3){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
124 message("Killing the request! Something is not working. Please, try again later","\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
125 return(data)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
126 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
127
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
128 # ENTREZ server connect
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
129 out.data <- tryCatch({
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
130 tmpConnect <- suppressWarnings(url(efetch_url, open = "rb", encoding = "UTF8"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
131 suppressWarnings(readLines(tmpConnect, warn = FALSE, encoding = "UTF8"))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
132 }, error = function(e) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
133 print(e)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
134 }, finally = {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
135 try(suppressWarnings(close(tmpConnect)), silent = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
136 })
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
137
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
138 # Check if error
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
139 if (!is.null(out.data) &&
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
140 class(out.data) == "character" &&
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
141 grepl("<ERROR>", substr(paste(utils::head(out.data, n = 100), collapse = ""), 1, 250))) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
142 out.data <- NULL
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
143 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
144 try_num <- try_num + 1
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
145 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
146
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
147 if (is.null(out.data)) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
148 message("Killing the request! Something is not working. Please, try again later","\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
149 return(data)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
150 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
151
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
152 # process xml data
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
153 xml_data <- paste(out.data, collapse = "")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
154
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
155 # articles to list
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
156 xml_data <- strsplit(xml_data, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1]
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
157 xml_data <- sapply(xml_data, function(x) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
158 #trim extra stuff at the end of the record
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
159 if (!grepl("</PubmedArticle>$", x))
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
160 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
161 # Rebuid XML structure and proceed
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
162 x <- paste("<PubmedArticle>", x)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
163 gsub("[[:space:]]{2,}", " ", x)},
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
164 USE.NAMES = FALSE, simplify = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
165
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
166 #titles
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
167 titles = sapply(xml_data, function(x){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
168 x = custom_grep(x, tag="ArticleTitle", format="char")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
169 x <- gsub("</{0,1}i>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
170 x <- gsub("</{0,1}b>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
171 x <- gsub("</{0,1}sub>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
172 x <- gsub("</{0,1}exp>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
173 if (length(x) > 1){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
174 x <- paste(x, collapse = " ", sep = " ")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
175 } else if (length(x) < 1) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
176 x <- NA
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
177 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
178 x
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
179 },
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
180 USE.NAMES = FALSE, simplify = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
181
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
182 # abstracts
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
183 abstract.text = sapply(xml_data, function(x){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
184 custom_grep(x, tag="AbstractText", format="char")},
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
185 USE.NAMES = FALSE, simplify = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
186
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
187 abstracts <- sapply(abstract.text, function(x){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
188 if (length(x) > 1){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
189 x <- paste(x, collapse = " ", sep = " ")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
190 x <- gsub("</{0,1}i>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
191 x <- gsub("</{0,1}b>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
192 x <- gsub("</{0,1}sub>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
193 x <- gsub("</{0,1}exp>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
194 } else if (length(x) < 1) {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
195 x <- NA
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
196 } else {
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
197 x <- gsub("</{0,1}i>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
198 x <- gsub("</{0,1}b>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
199 x <- gsub("</{0,1}sub>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
200 x <- gsub("</{0,1}exp>", "", x, ignore.case = T)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
201 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
202 x
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
203 },
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
204 USE.NAMES = FALSE, simplify = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
205
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
206 #add title to abstracts
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
207 if (length(titles) == length(abstracts)){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
208 abstracts = paste(titles, abstracts)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
209 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
210
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
211 #add abstracts to data frame
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
212 if(length(abstracts)>0){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
213 data[row,sapply(1:length(abstracts),function(i){paste0("ABSTRACT_",i)})] <- abstracts
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
214 cat(length(abstracts)," abstracts for ",query, " are added in the table.", "\n")
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
215 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
216
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
217 return(data)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
218 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
219 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
220
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
221 for(i in 1:nrow(data)){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
222 data = tryCatch(pubmed_data_in_table(data= data,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
223 row= i,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
224 query= data[i,id_col_index],
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
225 number= args$number,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
226 key= args$key,
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
227 abstract= args$abstract), error=function(e){
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
228 print('main error')
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
229 print(e)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
230 Sys.sleep(5)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
231 })
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
232 }
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
233
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
234 write.table(data, args$output, append = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
235
34ed44f3f85c "planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
dlalgroup
parents:
diff changeset
236