Mercurial > repos > metaboflow_cam > ionflow
view ionflow/ionflow.xml @ 0:3b461dc9568b draft default tip
Uploaded
author | metaboflow_cam |
---|---|
date | Mon, 09 Aug 2021 09:41:22 +0000 |
parents | |
children |
line wrap: on
line source
<!-- wl-10-08-2020, Mon: commence wl-24-08-2020, Mon: first version wl-16-11-2020, Mon: second version wl-23-03-2021, Tue: third version wl-08-06-2021, Tue: fourth version: new stuff from Jacopo --> <tool id="ionfow" name="IonFlow" version="0.4.0"> <description> Pipeline for processing and analysis of ionomics data </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <!-- =============================================================== --> <command detect_errors="exit_code"> <![CDATA[ Rscript ${__tool_directory__}/ionflow.R ## Input --ion_file '$ion_file' --var_id '$pre.var_id' --batch_id '$pre.batch_id' --data_id '$pre.data_id' --method_norm '$pre.method_norm' --batch_control '$pre.batch.batch_control' #if $pre.batch.batch_control=='yes' --control_lines '$pre.batch.control_lines' --control_use '$pre.batch.control_use' #end if --method_outliers '$pre.method_outliers' --thres_outl '$pre.thres_outl' --stand_method '$pre.stand.stand_method' #if $pre.stand.stand_method=='custom': --std_file '$pre.stand.std_file' #end if --thres_symb '$pre.thres_symb' ## Exploratory analysis --thres_ion_corr '$expl.thres_ion_corr' ## Clustering analysis --min_clust_size '$clus.min_clust_size' --h_tree '$clus.h_tree' --filter_zero_string '$clus.filter_zero_string' ## Enrichment analysis --pval '$enri.pval' --min_count '$enri.min_count' --ont '$enri.ont' --annot_pkg '$enri.annot_pkg' ## Network analysis --method_corr '$net.method_corr' --thres_corr '$net.thres_corr' ## output: pre-processing --pre_proc_pdf '$pre_proc_pdf' --df_stats_out '$df_stats_out' --outl_out '$outl_out' --data_wide_out '$data_wide_out' --data_wide_symb_out '$data_wide_symb_out' ## output: exploratory analysis --expl_anal_pdf '$expl_anal_pdf' ## output: clustering analysis --clus_anal_pdf '$clus_anal_pdf' ## output: enrichment analysis --go_en_out '$go_en_out' ## output: network analysis --gene_net_pdf '$gene_net_pdf' --imbe_out '$imbe_out' ]]> </command> <!-- =============================================================== --> <inputs> <param name="ion_file" type="data" format="csv" label="Ion data table" help="Ion data table with columns of Ions and meta information." /> <!-- start of pre --> <section name="pre" title="Pre Processing" > <param name="var_id" type="integer" value="1" label="Specify variable column index of input data" help="Indicate which column will be the variable (ORF or SYMBOL)." /> <param name="batch_id" type="integer" value="3" label="Specify batch ID column index of input data" help="Indicate which column will be batch ID." /> <param name="data_id" type="integer" value="5" label="Specify data start column index of input data" help="Indicate which column will be the start of data matrix." /> <param name="method_norm" type="select" label="Select a method for batch correction"> <option value="median" selected="true">Median</option> <option value="median+std">Median plus std</option> <option value="none">None</option> </param> <!-- batch control --> <conditional name="batch"> <param name="batch_control" type="select" label="Use control lines for batch correction or not" > <option value="yes" selected="true">Yes</option> <option value="no">No</option> </param> <when value="yes"> <param name="control_lines" type="text" value="BY4741" label="Specify batch control lines (rows)"> <sanitizer> <valid initial="string.ascii_letters,string.digits"></valid> </sanitizer> </param> <param name="control_use" type="select" label="Select lines for batch correction"> <option value="control">Use control lines for batch correction</option> <option value="all" selected="true">Use all lines for batch correction</option> <option value="control.out">Use all lines except control lines for batch correction</option> </param> </when> <when value="no"> </when> </conditional> <param name="method_outliers" type="select" label="Select a method for outlier detection"> <option value="IQR">IQR</option> <option value="mad">MAD</option> <option value="log.FC.dist" selected="true">log FC dist</option> <option value="none">none</option> </param> <param name="thres_outl" type="float" value="3.0" label="Specify outlier detection threshold" /> <!-- standardisation method --> <conditional name="stand"> <param name="stand_method" type="select" label="Select a method for standardisation"> <option value="std" selected="true">STD</option> <option value="mad">MAD</option> <option value="custom">Custom</option> </param> <when value="custom"> <param name="std_file" type="data" format="tabular" label="STD file" help="A data matrix with only two columns. The fisrt column is the names of ion and the second one is std values. " /> </when> </conditional> <param name="thres_symb" type="float" value="2.0" label="Specify symbolisation threshold" /> </section> <!-- end of pre --> <section name="expl" title="Exploratory analysis" > <param name="thres_ion_corr" type="float" value="0.15" label="Threshold for Ion correlation (0 - 1)" /> </section> <section name="clus" title="Clustering analysis" > <param name="min_clust_size" type="float" value="10.0" label="Specify minimal cluster center number" /> <param name="h_tree" type="float" value="0.0" label="Cutting height for hierarchical clustering" /> <param name="filter_zero_string" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Filter the zero string?" /> </section> <section name="enri" title="Enrichment analysis" > <param name="pval" type="float" value="0.05" label="Specify p-value threshold for enrichment analysiss" /> <param name="min_count" type="float" value="3.0" label="Minimal count number for enrichment analysis" /> <param name="ont" type="select" label="Select gene ontology for GO Terms"> <option value="BP" selected="true">BP</option> <option value="MF">MF</option> <option value="CC">CC</option> </param> <param name="annot_pkg" type="select" label="Select an annotation package"> <option value="org.Sc.sgd.db" selected="true">Yeast(org.Sc.sgd.db)</option> <option value="org.Hs.eg.db">Human(org.Hs.eg.db)</option> <option value="org.Mm.eg.db">Mouse(org.Mm.eg.db)</option> </param> </section> <section name="net" title="Network analysis" > <param name="method_corr" type="select" label="Select a method for similarity measure"> <option value="pearson">Pearson</option> <option value="spearman">Spearman</option> <option value="kendall">Kendall</option> <option value="cosine" selected="true">Cosine</option> <option value="mahal_cosine">Mahalanobis Cosine</option> <option value="hybrid_mahal_cosine">Hybrid Mahalanobis Cosine</option> </param> <param name="thres_corr" type="float" value="0.7" min="0" max="1" label="Specify similarity threshold (0 - 1)" /> </section> </inputs> <!-- =============================================================== --> <outputs> <data format="pdf" name="pre_proc_pdf" label="Pre-processing plots for Ions on ${on_string}" /> <data format="tabular" name="df_stats_out" label="Statistical summary of data set on ${on_string}"/> <data format="tabular" name="outl_out" label="Outlier table on ${on_string}"/> <data format="tabular" name="data_wide_out" label="Pre-processed data in wide format on ${on_string}"/> <data format="tabular" name="data_wide_symb_out" label="Symbolization data in wide format on ${on_string}"/> <data format="pdf" name="expl_anal_pdf" label="Explanatation analysis plots for Ions on ${on_string}" /> <data format="pdf" name="clus_anal_pdf" label="Explanatation analysis plots for Ions on ${on_string}" /> <data format="tabular" name="go_en_out" label="GO enrichment table on ${on_string}"/> <data format="pdf" name="gene_net_pdf" label="Gene network plots on ${on_string}" /> <data format="tabular" name="imbe_out" label="Impact and betweenness table on ${on_string}"/> </outputs> <!-- =============================================================== --> <tests> <test> <param name="ion_file" value="Dataset_IonFlow_Ionome_KO_short.csv" /> <param name="var_id" value="1" /> <param name="batch_id" value="3" /> <param name="data_id" value="5" /> <param name="method_norm" value="median" /> <param name="batch_control" value="yes" /> <param name="control_lines" value="BY4741" /> <param name="control_use" value="all" /> <param name="method_outliers" value="log.FC.dist" /> <param name="thres_outl" value="3.0" /> <param name="stand_method" value="std" /> <param name="thres_symb" value="2" /> <param name="thres_ion_corr" value="0.15" /> <param name="min_clust_size" value="10.0" /> <param name="h_tree" value="0.0" /> <param name="filter_zero_string" value="TRUE" /> <param name="pval" value="0.05" /> <param name="min_count" value="3" /> <param name="ont" value="BP" /> <param name="annot_pkg" value="org.Sc.sgd.db" /> <param name="method_corr" value="cosine" /> <param name="thres_corr" value="0.7" /> <output name="pre_proc_pdf" file="res/pre_proc.pdf" compare="sim_size" /> <output name="df_stats_out" file="res/df_stats.tsv" compare="diff" /> <output name="outl_out" file="res/outl.tsv" compare="diff" /> <output name="data_wide_out" file="res/data_wide.tsv" compare="diff" /> <output name="data_wide_symb_out" file="res/data_wide_symb.tsv" compare="diff" /> <output name="expl_anal_pdf" file="res/expl_anal.pdf" compare="sim_size" /> <output name="clus_anal_pdf" file="res/clus_anal.pdf" compare="sim_size" /> <output name="go_en_out" file="res/go_en.tsv" compare="diff" /> <output name="gene_net_pdf" file="res/gene_net.pdf" compare="sim_size" /> <output name="imbe_out" file="res/impact_betweenness.tsv" compare="diff" /> </test> </tests> <!-- =============================================================== --> <help> IonFlow Pipeline ================= Description ----------- This galaxy tool wraps R package IonFlow_ with modification to process ionomics data to aid reproducible data sharing and big data initiatives. The pipeline includes: Pre-Processing This procedure performs batch correction with or without control lines, outlier detection and data standardisation. The processed concentration data and a symbolisation profile data are returned for further analysis. Exploratory Analysis This procedure performs correlation analysis and PCA analysis in terms of ions. The heatmap with hierarchical clustering and network graph are based on the correlation analysis. Clustering Analysis This step performs hierarchical clustering based on symbolised profile. The selected cluster centres (control by the threshold of minimal cluster centre number) are applied to enrichment and network analysis. Enrichment Analysis This step uses the clustering results for enrichment analysis. Current suports three annotation packages: - Yeast(org.Sc.sgd.db) - Human(org.Hs.eg.db) - Mouse(org.Mm.eg.db) Network Analysis This part uses hierarchical clustering of the symbolised profile for network analysis. Genes with correlation coefficient large than a threshold are then used. Some network analysis stats such as impact and betweenness are also returned. .. _IonFlow: https://github.com/AlinaPeluso/MetaboFlow Inputs ------ Ionomics data ~~~~~~~~~~~~~ The input file is an ionomics data table in tubular format. The following is an example with the first two columns of knockout name and batch ID and other columns are ion data. To use this input data in ``Pre-processing`` , the user must indicate ``var_id`` as ``1`` (``Knockout``), ``batch_id`` as ``2`` (``Batch_ID``) and ``data_id`` as ``3``. If the file has the batch control information in the first column, ``control_lines`` should indicate. For example, if ``YDL227C`` will be used as batch control, ``control_lines = "YDL227C"``. +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | Knockout | Batch_ID | Ca | Cd | Cu | Fe | K | Mg | Mo | Na | Ni | P | S | +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | YDL227C | 14 | 59.549 | 0.953 | 2.202 | 10.942 | 3448.070 | 693.992 | 1.603 | 259.816 | 1.573 | 4963.315 | 556.397 | +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | YDL227C | 14 | 62.258 | 0.927 | 2.067 | 26.262 | 3493.741 | 705.008 | 2.691 | 273.640 | 4.443 | 4874.101 | 553.229 | +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | YDL227C | 14 | 65.075 | 0.875 | 2.048 | 10.244 | 3317.611 | 691.411 | 1.878 | 278.167 | 1.448 | 4608.300 | 535.609 | +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | YDL227C | 14 | 56.886 | 0.985 | 2.203 | 9.206 | 3330.854 | 702.734 | 1.396 | 268.609 | 1.640 | 5119.736 | 546.230 | +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+ | Custom STD data ~~~~~~~~~~~~~~~ Standard derivation values can be provide by user if standardisation method ``stand_method`` in pre-processing procedure is selected as ``custom``. The user defined std file has tabular format such as: +------+----------+ | Ion | sd | +------+----------+ | Ca | 0.150 | +------+----------+ | Fe | 0.163 | +------+----------+ | K | 0.094 | +------+----------+ | Mg | 0.059 | +------+----------+ | Na | 0.107 | +------+----------+ | Ni | 0.078 | +------+----------+ | Zn | 0.067 | +------+----------+ | Outputs ------- Pre-processing ~~~~~~~~~~~~~~ The output includes: - A PDF file for the plots: dot-plot with ion vs batch and distribution plot of ions. - A tabular file for statistical summary of data set - A tabular file for outlier table - A tabular file for processed data set - A tabular file for the symbolisation data set Exploratory analysis ~~~~~~~~~~~~~~~~~~~~ A single PDF file with plots: - Correlation map - Heatmap with dendrogram - PCA plot - Correlation network graph Clustering analysis ~~~~~~~~~~~~~~~~~~~ A single PDF file with clustering plots. Enrichment analysis ~~~~~~~~~~~~~~~~~~~ A tabular file for GO Terms enrichment table. Network analysis ~~~~~~~~~~~~~~~~ Two files are returned: - A PDF file for plots - network graph - impact scatter plot - A tabular file for impact and betweenness table </help> <citations> </citations> </tool>