Mercurial > repos > metaboflow_cam > ionflow
diff ionflow/ionflow.xml @ 0:3b461dc9568b draft default tip
Uploaded
author | metaboflow_cam |
---|---|
date | Mon, 09 Aug 2021 09:41:22 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ionflow/ionflow.xml Mon Aug 09 09:41:22 2021 +0000 @@ -0,0 +1,437 @@ +<!-- +wl-10-08-2020, Mon: commence +wl-24-08-2020, Mon: first version +wl-16-11-2020, Mon: second version +wl-23-03-2021, Tue: third version +wl-08-06-2021, Tue: fourth version: new stuff from Jacopo +--> + +<tool id="ionfow" name="IonFlow" version="0.4.0"> + <description> + Pipeline for processing and analysis of ionomics data + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + + <!-- =============================================================== --> + <command detect_errors="exit_code"> + <![CDATA[ + + Rscript ${__tool_directory__}/ionflow.R + ## Input + --ion_file '$ion_file' + --var_id '$pre.var_id' + --batch_id '$pre.batch_id' + --data_id '$pre.data_id' + --method_norm '$pre.method_norm' + --batch_control '$pre.batch.batch_control' + #if $pre.batch.batch_control=='yes' + --control_lines '$pre.batch.control_lines' + --control_use '$pre.batch.control_use' + #end if + --method_outliers '$pre.method_outliers' + --thres_outl '$pre.thres_outl' + --stand_method '$pre.stand.stand_method' + #if $pre.stand.stand_method=='custom': + --std_file '$pre.stand.std_file' + #end if + --thres_symb '$pre.thres_symb' + + ## Exploratory analysis + --thres_ion_corr '$expl.thres_ion_corr' + + ## Clustering analysis + --min_clust_size '$clus.min_clust_size' + --h_tree '$clus.h_tree' + --filter_zero_string '$clus.filter_zero_string' + + ## Enrichment analysis + --pval '$enri.pval' + --min_count '$enri.min_count' + --ont '$enri.ont' + --annot_pkg '$enri.annot_pkg' + + ## Network analysis + --method_corr '$net.method_corr' + --thres_corr '$net.thres_corr' + + ## output: pre-processing + --pre_proc_pdf '$pre_proc_pdf' + --df_stats_out '$df_stats_out' + --outl_out '$outl_out' + --data_wide_out '$data_wide_out' + --data_wide_symb_out '$data_wide_symb_out' + + ## output: exploratory analysis + --expl_anal_pdf '$expl_anal_pdf' + + ## output: clustering analysis + --clus_anal_pdf '$clus_anal_pdf' + + ## output: enrichment analysis + --go_en_out '$go_en_out' + + ## output: network analysis + --gene_net_pdf '$gene_net_pdf' + --imbe_out '$imbe_out' + ]]> + </command> + + <!-- =============================================================== --> + <inputs> + <param name="ion_file" type="data" format="csv" + label="Ion data table" + help="Ion data table with columns of Ions and meta information." /> + + <!-- start of pre --> + <section name="pre" title="Pre Processing" > + + <param name="var_id" type="integer" value="1" + label="Specify variable column index of input data" + help="Indicate which column will be the variable (ORF or SYMBOL)." /> + + <param name="batch_id" type="integer" value="3" + label="Specify batch ID column index of input data" + help="Indicate which column will be batch ID." /> + + <param name="data_id" type="integer" value="5" + label="Specify data start column index of input data" + help="Indicate which column will be the start of data matrix." /> + + <param name="method_norm" type="select" + label="Select a method for batch correction"> + <option value="median" selected="true">Median</option> + <option value="median+std">Median plus std</option> + <option value="none">None</option> + </param> + + <!-- batch control --> + <conditional name="batch"> + <param name="batch_control" type="select" + label="Use control lines for batch correction or not" > + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + + <when value="yes"> + <param name="control_lines" type="text" value="BY4741" + label="Specify batch control lines (rows)"> + <sanitizer> + <valid initial="string.ascii_letters,string.digits"></valid> + </sanitizer> + </param> + + <param name="control_use" type="select" + label="Select lines for batch correction"> + <option value="control">Use control lines for batch correction</option> + <option value="all" selected="true">Use all lines for batch correction</option> + <option value="control.out">Use all lines except control lines for batch correction</option> + </param> + </when> + + <when value="no"> + </when> + </conditional> + + <param name="method_outliers" type="select" + label="Select a method for outlier detection"> + <option value="IQR">IQR</option> + <option value="mad">MAD</option> + <option value="log.FC.dist" selected="true">log FC dist</option> + <option value="none">none</option> + </param> + + <param name="thres_outl" type="float" value="3.0" + label="Specify outlier detection threshold" /> + + <!-- standardisation method --> + <conditional name="stand"> + <param name="stand_method" type="select" + label="Select a method for standardisation"> + <option value="std" selected="true">STD</option> + <option value="mad">MAD</option> + <option value="custom">Custom</option> + </param> + + <when value="custom"> + <param name="std_file" type="data" format="tabular" + label="STD file" + help="A data matrix with only two columns. The fisrt + column is the names of ion and the second one is std + values. " /> + </when> + </conditional> + + <param name="thres_symb" type="float" value="2.0" + label="Specify symbolisation threshold" /> + + </section> + <!-- end of pre --> + + <section name="expl" title="Exploratory analysis" > + <param name="thres_ion_corr" type="float" value="0.15" + label="Threshold for Ion correlation (0 - 1)" /> + </section> + + <section name="clus" title="Clustering analysis" > + <param name="min_clust_size" type="float" value="10.0" + label="Specify minimal cluster center number" /> + <param name="h_tree" type="float" value="0.0" + label="Cutting height for hierarchical clustering" /> + <param name="filter_zero_string" type="boolean" truevalue="True" + falsevalue="False" checked="True" + label="Filter the zero string?" /> + </section> + + <section name="enri" title="Enrichment analysis" > + <param name="pval" type="float" value="0.05" + label="Specify p-value threshold for enrichment analysiss" /> + <param name="min_count" type="float" value="3.0" + label="Minimal count number for enrichment analysis" /> + <param name="ont" type="select" + label="Select gene ontology for GO Terms"> + <option value="BP" selected="true">BP</option> + <option value="MF">MF</option> + <option value="CC">CC</option> + </param> + <param name="annot_pkg" type="select" + label="Select an annotation package"> + <option value="org.Sc.sgd.db" selected="true">Yeast(org.Sc.sgd.db)</option> + <option value="org.Hs.eg.db">Human(org.Hs.eg.db)</option> + <option value="org.Mm.eg.db">Mouse(org.Mm.eg.db)</option> + </param> + </section> + + <section name="net" title="Network analysis" > + <param name="method_corr" type="select" + label="Select a method for similarity measure"> + <option value="pearson">Pearson</option> + <option value="spearman">Spearman</option> + <option value="kendall">Kendall</option> + <option value="cosine" selected="true">Cosine</option> + <option value="mahal_cosine">Mahalanobis Cosine</option> + <option value="hybrid_mahal_cosine">Hybrid Mahalanobis Cosine</option> + </param> + <param name="thres_corr" type="float" value="0.7" min="0" max="1" + label="Specify similarity threshold (0 - 1)" /> + </section> + + </inputs> + + + <!-- =============================================================== --> + <outputs> + <data format="pdf" name="pre_proc_pdf" + label="Pre-processing plots for Ions on ${on_string}" /> + <data format="tabular" name="df_stats_out" + label="Statistical summary of data set on ${on_string}"/> + <data format="tabular" name="outl_out" + label="Outlier table on ${on_string}"/> + <data format="tabular" name="data_wide_out" + label="Pre-processed data in wide format on ${on_string}"/> + <data format="tabular" name="data_wide_symb_out" + label="Symbolization data in wide format on ${on_string}"/> + <data format="pdf" name="expl_anal_pdf" + label="Explanatation analysis plots for Ions on ${on_string}" /> + <data format="pdf" name="clus_anal_pdf" + label="Explanatation analysis plots for Ions on ${on_string}" /> + <data format="tabular" name="go_en_out" + label="GO enrichment table on ${on_string}"/> + <data format="pdf" name="gene_net_pdf" + label="Gene network plots on ${on_string}" /> + <data format="tabular" name="imbe_out" + label="Impact and betweenness table on ${on_string}"/> + </outputs> + + <!-- =============================================================== --> + <tests> + <test> + <param name="ion_file" value="Dataset_IonFlow_Ionome_KO_short.csv" /> + <param name="var_id" value="1" /> + <param name="batch_id" value="3" /> + <param name="data_id" value="5" /> + <param name="method_norm" value="median" /> + <param name="batch_control" value="yes" /> + <param name="control_lines" value="BY4741" /> + <param name="control_use" value="all" /> + <param name="method_outliers" value="log.FC.dist" /> + <param name="thres_outl" value="3.0" /> + <param name="stand_method" value="std" /> + <param name="thres_symb" value="2" /> + <param name="thres_ion_corr" value="0.15" /> + <param name="min_clust_size" value="10.0" /> + <param name="h_tree" value="0.0" /> + <param name="filter_zero_string" value="TRUE" /> + <param name="pval" value="0.05" /> + <param name="min_count" value="3" /> + <param name="ont" value="BP" /> + <param name="annot_pkg" value="org.Sc.sgd.db" /> + <param name="method_corr" value="cosine" /> + <param name="thres_corr" value="0.7" /> + <output name="pre_proc_pdf" file="res/pre_proc.pdf" compare="sim_size" /> + <output name="df_stats_out" file="res/df_stats.tsv" compare="diff" /> + <output name="outl_out" file="res/outl.tsv" compare="diff" /> + <output name="data_wide_out" file="res/data_wide.tsv" compare="diff" /> + <output name="data_wide_symb_out" file="res/data_wide_symb.tsv" compare="diff" /> + <output name="expl_anal_pdf" file="res/expl_anal.pdf" compare="sim_size" /> + <output name="clus_anal_pdf" file="res/clus_anal.pdf" compare="sim_size" /> + <output name="go_en_out" file="res/go_en.tsv" compare="diff" /> + <output name="gene_net_pdf" file="res/gene_net.pdf" compare="sim_size" /> + <output name="imbe_out" file="res/impact_betweenness.tsv" compare="diff" /> + </test> + </tests> + + <!-- =============================================================== --> +<help> +IonFlow Pipeline +================= + +Description +----------- + +This galaxy tool wraps R package IonFlow_ with modification to process +ionomics data to aid reproducible data sharing and big data initiatives. + +The pipeline includes: + +Pre-Processing + This procedure performs batch correction with or without control lines, + outlier detection and data standardisation. The processed concentration + data and a symbolisation profile data are returned for further analysis. + +Exploratory Analysis + This procedure performs correlation analysis and PCA analysis in terms of + ions. The heatmap with hierarchical clustering and network graph are based + on the correlation analysis. + +Clustering Analysis + This step performs hierarchical clustering based on symbolised profile. + The selected cluster centres (control by the threshold of minimal cluster + centre number) are applied to enrichment and network analysis. + +Enrichment Analysis + This step uses the clustering results for enrichment analysis. Current suports + three annotation packages: + + - Yeast(org.Sc.sgd.db) + - Human(org.Hs.eg.db) + - Mouse(org.Mm.eg.db) + +Network Analysis + This part uses hierarchical clustering of the symbolised profile + for network analysis. Genes with correlation coefficient large than a + threshold are then used. Some network analysis stats such as impact and + betweenness are also returned. + +.. _IonFlow: https://github.com/AlinaPeluso/MetaboFlow + +Inputs +------ + +Ionomics data +~~~~~~~~~~~~~ + +The input file is an ionomics data table in tubular format. The following is +an example with the first two columns of knockout name and batch ID and +other columns are ion data. To use this input data in ``Pre-processing`` , the +user must indicate ``var_id`` as ``1`` (``Knockout``), ``batch_id`` as ``2`` +(``Batch_ID``) and ``data_id`` as ``3``. If the file has the batch control +information in the first column, ``control_lines`` should indicate. For +example, if ``YDL227C`` will be used as batch control, ``control_lines = +"YDL227C"``. + + + +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + | Knockout | Batch_ID | Ca | Cd | Cu | Fe | K | Mg | Mo | Na | Ni | P | S | + +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + | YDL227C | 14 | 59.549 | 0.953 | 2.202 | 10.942 | 3448.070 | 693.992 | 1.603 | 259.816 | 1.573 | 4963.315 | 556.397 | + +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + | YDL227C | 14 | 62.258 | 0.927 | 2.067 | 26.262 | 3493.741 | 705.008 | 2.691 | 273.640 | 4.443 | 4874.101 | 553.229 | + +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + | YDL227C | 14 | 65.075 | 0.875 | 2.048 | 10.244 | 3317.611 | 691.411 | 1.878 | 278.167 | 1.448 | 4608.300 | 535.609 | + +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + | YDL227C | 14 | 56.886 | 0.985 | 2.203 | 9.206 | 3330.854 | 702.734 | 1.396 | 268.609 | 1.640 | 5119.736 | 546.230 | + +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ + +| + +Custom STD data +~~~~~~~~~~~~~~~ + +Standard derivation values can be provide by user if standardisation method +``stand_method`` in pre-processing procedure is selected as ``custom``. The +user defined std file has tabular format such as: + + +------+----------+ + | Ion | sd | + +------+----------+ + | Ca | 0.150 | + +------+----------+ + | Fe | 0.163 | + +------+----------+ + | K | 0.094 | + +------+----------+ + | Mg | 0.059 | + +------+----------+ + | Na | 0.107 | + +------+----------+ + | Ni | 0.078 | + +------+----------+ + | Zn | 0.067 | + +------+----------+ + +| + +Outputs +------- + +Pre-processing +~~~~~~~~~~~~~~ + +The output includes: + +- A PDF file for the plots: dot-plot with ion vs batch and distribution + plot of ions. +- A tabular file for statistical summary of data set +- A tabular file for outlier table +- A tabular file for processed data set +- A tabular file for the symbolisation data set + +Exploratory analysis +~~~~~~~~~~~~~~~~~~~~ + +A single PDF file with plots: + +- Correlation map +- Heatmap with dendrogram +- PCA plot +- Correlation network graph + +Clustering analysis +~~~~~~~~~~~~~~~~~~~ + +A single PDF file with clustering plots. + +Enrichment analysis +~~~~~~~~~~~~~~~~~~~ + +A tabular file for GO Terms enrichment table. + +Network analysis +~~~~~~~~~~~~~~~~ + +Two files are returned: + +- A PDF file for plots + - network graph + - impact scatter plot +- A tabular file for impact and betweenness table + + +</help> + <citations> + </citations> +</tool>