Mercurial > repos > iuc > pmids_to_pubtator_matrix
diff pmids_to_pubtator_matrix.xml @ 0:69714f06f18b draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/simtext commit 63a5e13cf89cdd209d20749c582ec5b8dde4e208"
author | iuc |
---|---|
date | Wed, 24 Mar 2021 08:33:56 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pmids_to_pubtator_matrix.xml Wed Mar 24 08:33:56 2021 +0000 @@ -0,0 +1,109 @@ + <tool id="pmids_to_pubtator_matrix" name="PMIDs to PubTator" version="@VERSION@" license="MIT"> + <description>binary matrix</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.0.3">r-argparse</requirement> + <requirement type="package" version="1.4.0">r-stringr</requirement> + <requirement type="package" version="1.98_1.2">r-rcurl</requirement> + <requirement type="package" version="1.5.3">r-stringi</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + Rscript + '${__tool_directory__}/pmids_to_pubtator_matrix.R' + --input '$input' + --output '$output' + --number '$number' + $byid + --categories + #for $category in $categories: + '$category' + #end for + ]]> + </command> + <inputs> + <param argument="--input" type="data" format="tabular" label="Input file with PMID IDs" /> + <param argument="--categories" type="select" label="categories" multiple="true" display="checkboxes"> + <option value="Gene">Genes</option> + <option value="Disease">Diseases</option> + <option value="Mutation">Mutations</option> + <option value="Chemical">Chemicals</option> + <option value="Species">Species</option> + </param> + <param argument="--byid" label="If you want to find common gene IDs / mesh IDs instead of specific scientific terms." name="byid" type="boolean" truevalue="--byid" falsevalue="" help="byid" checked="false"/> + <param argument="--number" label="Number of most frequent terms/IDs to extract." name="number" optional="true" type="integer" help="number" value="50"/> + </inputs> + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="input" value="pubmed_by_queries_output" ftype="tabular"/> + <param name="categories" value="Gene,Mutation"/> + <output name="output"> + <assert_contents> + <has_n_lines n="7"/> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="pubmed_by_queries_output" ftype="tabular"/> + <param name="categories" value="Gene,Disease"/> + <param name="byid" value="True"/> + <output name="output"> + <assert_contents> + <has_n_lines n="7"/> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="pubmed_by_queries_output" ftype="tabular"/> + <param name="categories" value="Gene,Disease"/> + <param name="number" value="5"/> + <output name="output"> + <assert_contents> + <has_n_lines n="7"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + +**What it does** + +The tool uses all PMIDs per row and extracts "Gene", "Disease", "Mutation", "Chemical" and "Species" terms of the corresponding abstracts, +using PubTator annotations. The user can choose from which categories terms should be extracted. The extracted terms are united in one +large binary matrix, with 0= term not present in abstracts of that row and 1= term present in abstracts of that row. +The user can decide if the scientific terms should be extracted and used as they are or if they should be grouped by their +geneIDs/ meshIDs (several terms are often grouped into one ID). The the user can specify a number of most frequent words to extract per row. + +- Input file: + + Output of 'abstracts_by_pmids' tool, or tab-delimited table with columns containing PMIDs. + The names of the PMID columns should start with "PMID", e.g. "PMID_1", "PMID_2" etc. + +- Output file: + + Binary matrix in that each column represents one of the extracted terms. + +----- + +**Example** + +- Input table: + + | PMID_1 | PMID_2 | PMID_2 + | 33565071 | 33531663 | 33528079 + | 33377604 | 33334860 | 33277917 + +- Extract of output table: + + | egfr | hormone | tp53 | scn8a | cacna1a | grin2a + | 1 | 0 | 1 | 0 | 1 | 0 + | 1 | 1 | 1 | 1 | 0 | 1 + + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file