Mercurial > repos > artbio > pathifier
diff pathifier.xml @ 0:fec313f5c889 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/pathifier commit b94cfc7bf8df30aa8e9249b75ea31332ee2bada1"
author | artbio |
---|---|
date | Mon, 12 Apr 2021 09:55:24 +0000 |
parents | |
children | 0960bd1161fa |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pathifier.xml Mon Apr 12 09:55:24 2021 +0000 @@ -0,0 +1,192 @@ +<tool id="pathifier" name="Pathifier" version="1.0.1"> + <description>: Quantify deregulation of pathways in cancer</description> + <requirements> + <requirement type="package" version="1.6.2=r35h6115d3f_0">r-optparse</requirement> + <requirement type="package" version="1.22.0=r351_0">bioconductor-pathifier</requirement> + <requirement type="package" version="1.0.12=r351h6115d3f_0">r-pheatmap</requirement> + <requirement type="package" version="0.3_41=r351h6115d3f_0">r-scatterplot3d</requirement> + <requirement type="package" version="0.4.7=r35h6115d3f_0">r-circlize</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Tool exception" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + Rscript $__tool_directory__/pathifier.R + --exp '$input' + --sep '$input_sep' + --genes '$genes' + + #if $reference.is_normal == "Yes": + --is_normal 'TRUE' + --normals '$reference.normals' + #end if + + --max_stability '$max_stability' + --attempts '$attempts' + --min_std '$min_std' + --min_exp '$min_exp' + + --heatmap_cluster_cells '$heatmap_cluster_cells' + --heatmap_cluster_pathways '$heatmap_cluster_pathways' + --heatmap_show_cell_labels '$heatmap_show_cell_labels' + --heatmap_show_pathway_labels '$heatmap_show_pathway_labels' + + --pds '$pds' + --logfile '$logfile' + --plot '$plot' + --rdata '$rdatafile' + +]]></command> + <inputs> + <param name="input" type="data" format="txt,tabular" label="expression data"/> + <param name="input_sep" type="select" label="Input column separator"> + <option value="tab" selected="true">Tabs</option> + <option value=",">Comma</option> + </param> + <param name="genes" type="data" format="txt" label="Gene sets Pathways" + help="Must be in gmt format (one pathway per line : Name, description, genes (one by column), tab separated)" /> + <conditional name="reference"> + <param name="is_normal" label="Do you have non cancer transcriptomes in your data set ?" type="boolean" truevalue="Yes" falsevalue="" checked="false" + help="If set the starting curve depends on the matrix of points with in a certain row order (first 'normal' then 'cancer' samples), otherwise the first principal component is used. See help for more informations"/> + <when value="Yes"> + <param name="normals" type="data" format="tabular" label="Sample status" + help="A two-column data frame, first column contains data labels, second column the levels of sample status : 1 = Healthy, 0 = Tumor (no header)" /> + </when> + <when value=""> + </when> + </conditional> + <param name="max_stability" label="Throw away components leading to low stability of sampling noise" type="boolean" truevalue="TRUE" + falsevalue="FALSE" checked="true" /> + <param name="attempts" type="integer" label="Number of runs to determine stability" value="100"/> + <param name="min_std" type="text" value="0.4" label="Minimum of standard deviation to filter out low variable genes" + help="Use 'data' to use the minimum standard deviation of your data" /> + <param name="min_exp" type="text" value="4" label="Minimum of gene expression to filter out low variable genes" + help="Use 'data' to use the minimum expression of your data" /> + <param name="heatmap_cluster_cells" label="Cluster samples on heatmap" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" /> + <param name="heatmap_cluster_pathways" label="Cluster pathways on heatmap" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" /> + <param name="heatmap_show_cell_labels" label="Show sample labels on heatmap" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" /> + <param name="heatmap_show_pathway_labels" label="Cluster pathway labels on heatmap" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" /> + <param label="Return log file of pathifier" name="log" type="select" > + <option value="no" selected="True">No</option> + <option value="yes">Yes</option> + </param> + <param label="Return Pathifier S4 object" name="rdata" type="select" > + <option value="no" selected="True">No</option> + <option value="yes">Yes</option> + </param> + + </inputs> + <outputs> + <data name="pds" format="tabular" label="Pathifier Deregulation Score (PDS) of ${on_string}" /> + <data name="logfile" format="txt" label="Pathifier log file of ${on_string}" > + <filter>log == 'yes'</filter> + </data> + <data name="plot" format="pdf" label="Pathifier vizualization of ${on_string}" /> + <data name="rdatafile" format="rdata" label="Pathifier S4 object of ${on_string}" > + <filter>rds == 'yes'</filter> + </data> + </outputs> + <tests> + <test> + <param name="input" value="sheffer.tsv" ftype="tabular"/> + <param name="genes" value="kegg_pathways.gmt" ftype="txt" /> + <param name="is_normal" value="True" /> + <param name="normals" value="normals.tsv" ftype="tabular" /> + <param name="log" value="yes" /> + <param name="attempts" value="100" /> + <output name="logfile" file="sheffer.kegg.log" ftype="txt" compare="sim_size" /> + <output name="pds" file="sheffer.kegg.tsv" ftype="tabular"/> + <output name="plot" file="plot.pdf" ftype="pdf" compare="sim_size" /> + </test> + <test> + <param name="input" value="sheffer_noref.tsv" ftype="tabular"/> + <param name="genes" value="kegg_pathways.gmt" ftype="txt" /> + <param name="is_normal" value="" /> + <param name="log" value="no" /> + <param name="rdata" value="yes" /> + <param name="attempts" value="50" /> + <param name="min_exp" value="data" /> + <param name="min_std" value="data" /> + <output name="pds" file="sheffer.kegg_noref.tsv" ftype="tabular"/> + <output name="rdatafile" file="sheffer.kegg_noref.rdata" ftype="rdata" compare="sim_size" /> + <output name="plot" file="plot_noref.pdf" ftype="pdf" compare="sim_size" /> + </test> + </tests> + <help> + +**What it does** + +Pathifier is an algorithm that infers pathway deregulation scores for each (tumor) sample on the basis +of expression data. This score is determined, in a context-specific manner, for every particular dataset +and type of cancer that is being investigated. The algorithm transforms gene-level information into +pathway-level information, generating a compact and biologically relevant representation of each sample. + +For each pathway analysed, the transcriptome datasets are plotted in the gene pathway space using a +Principal Component analysis (PCA) and a principal curve is regressed from these points. All transcriptomes +are projected on the nearest point of this curve. Pathifier finally computes a score which corresponds to +the distance (normalized to 1) of each point (transcriptome) to the curve origin. + +**Inputs** + + * a matrix of n columns of observations (generally RNAseq experiments) and k rows of variables (generally k genes). + * a Gene Matrix Transposed file (GMT format) where each row represents a gene set : + * first column : gene set name (pathway name) + * second : description of gene set + * third and + : list of genes that composed the gene set tab-separated + * (Optional) a two column table with no header, to described transcriptome status (Tumor or not) : + * first column : sample labels + * second : levels of sample status : 1 = Healthy, 0 = Tumor + +**Outputs** + + * Table of Pathway Deregulation Scores : one by pathway (column) and by transcriptome (row) + * Visualization of PDS (pdf) : + * Principal curve of different PDS + * Heatmap of PDS that allows (through clustering) to see pattern in pathway deregulation + * (Optional) Log file of Pathifier algorithm + * (Optional) Pathifier S4 object which contains all informations and results generated by Pathifier, for each pathway : + * `scores` : PDS scores + * `genesinpathway` : Gene identifiers in each pathway + * `newmeanstd` + * `origmeanstd` + * `pathwaysize` : Number of genes retained in pathway + * `curves` : Coordinates of transcriptomes projected on the principal curve + * `curves_order` : Order of transcriptomes along the principal curve + * `z` : z-scores matrix + * `compin` + * `xm` + * `xs` + * `center` + * `rot` + * `pctaken` : Number of principal component retained for pathifier analysis + * `samplings` + * `sucess` : List of pathway index that passed Pathfiier filters (more info in log file) + * `logfile` : Name of logfile + + </help> + <citations> + <citation type="bibtex">@Manual{, + title = {{pathifier}: Quantify deregulation of pathways in cancer}, + author = {Yotam Drier}, + year = {2013-06-27}, + note = {R package version 1.22.0}, + url = {https://git.bioconductor.org/packages/pathifier}, + } + </citation> + <citation type="bibtex">@article {Drier6388, + author = {Drier, Yotam and Sheffer, Michal and Domany, Eytan}, + title = {Pathway-based personalized analysis of cancer}, + volume = {110}, + number = {16}, + pages = {6388--6393}, + year = {2013}, + doi = {10.1073/pnas.1219651110}, + publisher = {National Academy of Sciences}, + issn = {0027-8424}, + URL = {https://www.pnas.org/content/110/16/6388}, + eprint = {https://www.pnas.org/content/110/16/6388.full.pdf}, + journal = {Proceedings of the National Academy of Sciences} + } + </citation> + </citations> +</tool>