Mercurial > repos > bimib > marea
diff Marea/marea_cluster.xml @ 16:c71ac0bb12de draft
Uploaded
author | bimib |
---|---|
date | Tue, 01 Oct 2019 06:05:13 -0400 |
parents | 68a5f2db55b9 |
children | 640f303d0cec |
line wrap: on
line diff
--- a/Marea/marea_cluster.xml Tue Oct 01 06:03:12 2019 -0400 +++ b/Marea/marea_cluster.xml Tue Oct 01 06:05:13 2019 -0400 @@ -1,148 +1,92 @@ -<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.0"> - <description>of Reaction Activity Scores</description> - <macros> - <import>marea_macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="0.23.0">pandas</requirement> - <requirement type="package" version="1.1.0">scipy</requirement> - <requirement type="package" version="0.10.1">cobra</requirement> - <requirement type="package" version="0.19.1">scikit-learn</requirement> - <requirement type="package" version="2.2.2">matplotlib</requirement> - </requirements> - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/marea_cluster.py - --rules_selector $cond_rule.rules_selector - #if $cond_rule.rules_selector == 'Custom': - --custom ${cond_rule.Custom_rules} - #end if - --cond_hier $cond_hier.hier - #if $cond_hier.hier == 'yes': - --linkage ${cond_hier.linkage} - --dendro $dendrogram - #end if - --k_max $k_max - --k_min $k_min - --data $input - --name $name - --none $None - --tool_dir $__tool_directory__ - --out_log $log - --elbow $elbow - ]]> - </command> - <inputs> - <conditional name="cond_rule"> - <expand macro="options"/> - <when value="Custom"> - <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" /> - </when> - <when value="HMRcore"> - </when> - <when value="Recon"> - </when> - </conditional> - <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> - <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" /> - <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/> - <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/> - <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" /> - <conditional name="cond_hier"> - <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):"> - <option value="no" selected="true">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param name="linkage" argument="--linkage" type="select" label="Linkage type:"> - <option value="single" selected="true">Single: minimum distance between all observations of two sets</option> - <option value="complete">Complete: maximum distance between all observations of two sets</option> - <option value="average">Average: average distance between all observations of two sets</option> - </param> - </when> - <when value="no"> - </when> - </conditional> - </inputs> - - <outputs> - <data format="txt" name="log" label="Log" /> - <data format="pdf" name="dendrogram" label="$name dendrogram"> - <filter>cond_hier['hier'] == 'yes'</filter> - </data> - <data format="pdf" name="elbow" label="$name elbow evaluation method" /> - <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max"> - <discover_datasets pattern="__name_and_ext__" directory="cluster_out" /> - </collection> - </outputs> - <tests> - <test> - <param name="k_min" value="4"/> - <output name="log" file="log.txt"/> - </test> - </tests> - <help> -<![CDATA[ - -What it does -------------- - -This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724. - -Accepted files are: - 1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"); - 2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*"). - -Optional files: - - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: - - * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; - * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). - - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example. - -The tool generates: - 1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to; - 2) Log: a log file (.txt); - 3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method; - 4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering). - -RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID. - - -Example input -------------- - -**RNA-seq dataset**: - -@DATASET_EXEMPLE1@ - -**Custom Rules Dataset**: - -@CUSTOM_RULES_EXEMPLE@ - -**Custom Map**: - -*see the generated HMRcore .svg map for example* - - - -.. class:: infomark - -**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_. - -.. class:: warningmark - -If dendrogram it's too populated, each path and label can be not clear. - -@REFERENCE@ - -.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724 -.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj - - -]]> - </help> - <expand macro="citations" /> -</tool> - - +<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.1"> + <description>of Reaction Activity Scores - 1.0.1</description> + <macros> + <import>marea_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.23.0">pandas</requirement> + <requirement type="package" version="1.1.0">scipy</requirement> + <requirement type="package" version="0.10.1">cobra</requirement> + <requirement type="package" version="0.21.3">scikit-learn</requirement> + <requirement type="package" version="2.2.2">matplotlib</requirement> + <requirement type="package" version="1.17">numpy</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + python $__tool_directory__/marea_cluster.py + --input $input + --tool_dir $__tool_directory__ + --out_log $log + #if $data.clust_type == 'kmeans': + --k_min ${data.k_min} + --k_max ${data.k_max} + --elbow ${data.elbow} + --silhouette ${data.silhouette} + #end if + #if $data.clust_type == 'dbscan': + #if $data.dbscan_advanced.advanced == 'true' + --eps ${data.dbscan_advanced.eps} + --min_samples ${data.dbscan_advanced.min_samples} + #end if + #end if + #if $data.clust_type == 'hierarchy': + --k_min ${data.k_min} + --k_max ${data.k_max} + #end if + ]]> + </command> + <inputs> + <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + + <conditional name="data"> + <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> + <option value="kmeans" selected="true">KMeans</option> + <option value="dbscan">DBSCAN</option> + <option value="hierarchy">Agglomerative Hierarchical</option> + </param> + <when value="kmeans"> + <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> + <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> + <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/> + <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/> + </when> + <when value="dbscan"> + <conditional name="dbscan_advanced"> + <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)"> + <option value="true">Yes</option> + <option value="false">No</option> + </param> + <when value="false"></when> + <when value="true"> + <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" /> + <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/> + + </when> + </conditional> + </when> + <when value="hierarchy"> + <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> + <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="log" label="${tool.name} - Log" /> + <collection name="results" type="list" label="${tool.name} - Results"> + <discover_datasets pattern="__name_and_ext__" directory="clustering"/> + </collection> + </outputs> + <help> +<![CDATA[ + +What it does +------------- + + +]]> + </help> + <expand macro="citations" /> +</tool> + +