view COBRAxy/marea.xml @ 225:ba7043091fe3 draft

Uploaded
author luca_milaz
date Mon, 16 Dec 2024 17:21:36 +0000
parents 7e703e546998
children 84b31c9100ad
line wrap: on
line source

<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="2.0.0">
	<macros>
		<import>marea_macros.xml</import>
	</macros>
	
	<requirements>
		<requirement type="package" version="1.24.4">numpy</requirement>
        <requirement type="package" version="2.0.3">pandas</requirement>
        <requirement type="package" version="5.2.2">lxml</requirement>
        <requirement type="package" version="1.10.1">scipy</requirement>
		<requirement type="package" version="1.5.1">svglib</requirement>
		<requirement type="package" version="2.2.3">pyvips</requirement>
		<requirement type="package" version="2.7.1">cairosvg</requirement>
		<requirement type="package" version="0.29.0">cobra</requirement>
	</requirements>
	
	<command detect_errors="exit_code">
		<![CDATA[
      	python $__tool_directory__/marea.py

      	--tool_dir $__tool_directory__
      	--option $cond.type_selector
        --out_log $log
	
        #if $cond.type_selector == 'datasets':

			--using_RAS $cond.using_ras.check
			--using_RPS $cond.using_rps.check

			#if $cond.using_ras.check == 'true':
				--input_datas
				#for $data in $cond.using_ras.input_datasets:
					${data.input}
				#end for

				--names
				#for $data in $cond.using_ras.input_datasets:
					${data.input_name}
				#end for
			#end if

			#if $cond.using_rps.check == 'true':
				--input_datas_rps
				#for $data in $cond.using_rps.input_datasets_rps:
					${data.input_rps}
				#end for

				--names_rps
				#for $data in $cond.using_rps.input_datasets_rps:
					${data.input_name_rps}
				#end for
			#end if
        
        #elif $cond.type_selector == 'dataset_class':

			--using_RAS $cond.using_ras_all.check
			--using_RPS $cond.using_rps_all.check

			#if $cond.using_ras_all.check == 'true':
				--input_data ${cond.using_ras_all.input_data}
				--input_class ${cond.using_ras_all.input_class}
			#end if

			#if $cond.using_rps_all.check == 'true':
				--input_data_rps ${cond.using_rps_all.input_data_rps}
				--input_class_rps ${cond.using_rps_all.input_class_rps}
			#end if
        #end if

		--comparison ${comparis.comparison}
		#if $comparis.comparison == 'onevsmany'
			--control '${cond.comparis.controlgroup}'
		#end if

		--choice_map '${cond_choice_map.choice_map}'
		#if $cond_choice_map.choice_map == 'Custom':
			--custom_map ${cond_choice_map.custom_map}
		#end if
		
		#if $advanced.choice == 'true':
			--pValue ${advanced.pValue}
			--fChange ${advanced.fChange}
			--generate_svg ${advanced.generateSvg}
			--generate_pdf ${advanced.generatePdf}
			--net ${advanced.netRPS}
		#else 
			--pValue 0.05
			--fChange 1.2
			--generate_svg false
			--generate_pdf true
			--net false
		#end if
        ]]>
	</command>
	
	<inputs>
		<conditional name="cond">
			<param name="type_selector" argument="--option" type="select" label="Input format:">
				<option value="datasets" selected="true">RAS of group 1 + RAS of group 2 + ... + RAS of group N</option>
				<option value="dataset_class">RAS of all samples + sample group specification</option>
			</param>

			<when value="datasets">
				<conditional name = "using_ras">
					<param name = "check" argument = "--using_ras" type = "boolean" checked = "true" label = "Using RAS datasets." />

					<when value = "true">
						<repeat name="input_datasets" title="RAS dataset" min="2">
							<param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />
							<param name="input_name" argument="--names" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
						</repeat>
					</when>
				</conditional>

				<conditional name = "using_rps">
					<param name = "check" argument = "--using_rps" type = "boolean" checked = "false" label = "Using RPS datasets." />

					<when value = "true">
						<repeat name="input_datasets_rps" title="RPS dataset" min="2">
							<param name="input_rps" argument="--input_datas_rps" type="data" format="tabular, csv, tsv" label="add dataset" />
							<param name="input_name_rps" argument="--names_rps" type="text" label="Dataset's name:" value="Dataset" help="Default: Dataset" />
						</repeat>
					</when>
				</conditional>
			</when>

			<when value="dataset_class">
				<conditional name = "using_ras_all">
					<param name = "check" argument = "--using_ras_all" type = "boolean" checked = "true" label = "Using RAS datasets." />

					<when value = "true">
						<param name="input_data" argument="--input_data" type="data" format="tabular, csv, tsv" label="RAS of all samples" />
						<param name="input_class" argument="--input_class" type="data" format="tabular, csv, tsv" label="Sample group specification" />
					</when>
				</conditional>

				<conditional name = "using_rps_all">
					<param name = "check" argument = "--using_rps_all" type = "boolean" checked = "false" label = "Using RPS datasets." />

					<when value = "true">
						<param name="input_data_rps" argument="--input_data_rps" type="data" format="tabular, csv, tsv" label="RPS of all samples" />
						<param name="input_class_rps" argument="--input_class_rps" type="data" format="tabular, csv, tsv" label="Sample group specification" />
					</when>
			</conditional>
			</when>
		</conditional>
		
		<conditional name="comparis">
			<param name="comparison" argument="--comparison" type="select" label="Groups comparison:">
				<option value="manyvsmany" selected="true">One vs One</option>
				<option value="onevsrest">One vs All</option>
				<option value="onevsmany">One vs Control</option>
			</param>
			<when value="onevsmany">
				<param name="controlgroup" argument="--controlgroup" type="text" label="Control group label:" value="0" help="Name of group label to be compared to others"/>
			</when>
		</conditional>
		
		<conditional name="cond_choice_map">
			<param name="choice_map" argument="--choice_map" type="select" label="Choose metabolic map:">
				<option value="HMRcore" selected="true">HMRcore</option>
				<option value="ENGRO2">ENGRO2</option>
				<option value="Custom">Custom</option>
			</param>

			<when value="Custom">				
				<param name="custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>
			</when>
		</conditional>
		
		<conditional name="advanced">
			<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">
				<option value="true" selected="true">No</option>
				<option value="false">Yes</option>
			</param>

			<when value="true">
				<param name="pValue" argument="--pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold:" help="min value 0" />
				<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />
				<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />
				<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />
			
				<param name="netRPS" argument="--net" type="boolean" checked="false" label="Should RPS enrichment use net values?" help="If checked and RPS datasets are present the arrow tips of a reversible arrow will be colored with the net contribution of both directions' RPS values" />
			</when>
		</conditional>
	</inputs>

	<outputs>
		<data format="txt" name="log" label="MaREA - Log" />
		<collection name="results" type="list" label="MaREA - Results">
			<discover_datasets pattern="__name_and_ext__" directory="result"/>
		</collection>
	</outputs>
	
	<help>
	<![CDATA[

What it does
-------------

This tool analyzes and visualizes differences in the Reaction Activity Scores (RASs) of groups of samples, as computed by the Expression2RAS tool, of groups of samples.

Accepted files are: 
    - option 1) two or more RAS datasets, each referring to samples in a given group. The user can specify a label for each group (as e.g. "classA" and "classB");
    - option 2) one RAS dataset and one group-file specifying the group each sample belongs to.
    
RAS datasets format: tab-separated text files, reporting the RAS value of each reaction (row) for a given sample (column).

Column header: sample ID.
Row header: reaction ID. 

Optional files:
    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.

The tool generates:
    - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;
    - 2) a metabolic map file (downloadable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;
    - 3) a log file (.txt).
    
Output options:
To calculate P-Values and Fold-Changes and to enrich maps, comparisons are performed for each possible pair of groups (default option ‘One vs One’).

Alternative options are:
    - comparison of each group vs. the rest of samples (option ‘One vs Rest’)
    - comparison of each group vs. a control group (option ‘One vs Control). If this option is selected the user must indicate the control group label.

Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".

Example input
-------------

"RAS of group 1 + RAS of group 2 + ... + RAS of group N" option:

RAS Dataset 1:

+------------+----------------+----------------+----------------+ 
| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
+============+================+================+================+
| r1642      |    0.523167    |    0.371355    |    0.925661    |  
+------------+----------------+----------------+----------------+    
| r1643      |    0.568765    |    0.765567    |    0.456789    |    
+------------+----------------+----------------+----------------+    
| r1640      |    0.876545    |    0.768933    |    0.987654    |  
+------------+----------------+----------------+----------------+
| r1641      |    0.456788    |    0.876543    |    0.876542    |    
+------------+----------------+----------------+----------------+    
| r1646      |    0.876543    |    0.786543    |    0.897654    |   
+------------+----------------+----------------+----------------+

RAS Dataset 2:

+------------+----------------+----------------+----------------+ 
| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
+============+================+================+================+
| r1642      |    0.523167    |    0.371355    |    0.925661    |  
+------------+----------------+----------------+----------------+    
| r1643      |    0.568765    |    0.765567    |    0.456789    |    
+------------+----------------+----------------+----------------+    
| r1640      |    0.876545    |    0.768933    |    0.987654    |  
+------------+----------------+----------------+----------------+
| r1641      |    0.456788    |    0.876543    |    0.876542    |    
+------------+----------------+----------------+----------------+    
| r1646      |    0.876543    |    0.786543    |    0.897654    |   
+------------+----------------+----------------+----------------+

"RAS of all samples + sample group specification" option:

RAS Dataset:

+------------+----------------+----------------+----------------+ 
| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
+============+================+================+================+
| r1642      |    0.523167    |    0.371355    |    0.925661    |  
+------------+----------------+----------------+----------------+    
| r1643      |    0.568765    |    0.765567    |    0.456789    |    
+------------+----------------+----------------+----------------+    
| r1640      |    0.876545    |    0.768933    |    0.987654    |  
+------------+----------------+----------------+----------------+
| r1641      |    0.456788    |    0.876543    |    0.876542    |    
+------------+----------------+----------------+----------------+    
| r1646      |    0.876543    |    0.786543    |    0.897654    |   
+------------+----------------+----------------+----------------+

Group-file

+---------------+-----------+
| Patient ID    |   Class   | 
+===============+===========+
| TCGAAA3529    |    MSI    | 
+---------------+-----------+  
| TCGAA62671    |    MSS    |    
+---------------+-----------+   
| TCGAA62672    |    MSI    |
+---------------+-----------+

Advanced options
----------------

P-Value threshold: the threshold used for significance Kolmogorov-Smirnov (KS) test, to verify whether the distributions of RASs over the samples in two sets are significantly different

Fold-Change threshold: threshold of the fold-change between the average RAS of two groups. Among the reactions that pass the KS test, only fold-change values larger than the indicated threshold will be visualized on the output metabolic map;


.. class:: infomark

**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.

.. class:: infomark

**TIP**: If your dataset is not split into classes, use MaREA cluster analysis.

.. class:: infomark

**TIP**: This tool using the RAS scores computed by Ras generator tool.

@REFERENCE@

.. _Ras tool: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA+RAS+Generator%2F1.0.6&version=1.0.6&__identifer=auulv6gbp76
.. _Convert delimiters to TAB: http://bimib.disco.unimib.it:5555/?tool_id=Convert+characters1&version=1.0.0&__identifer=76g7trea4j6
.. _MaREA cluster analysis: http://bimib.disco.unimib.it:5555/?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fbimib%2Fmarea%2FMaREA_cluester%2F1.1.2&version=1.1.2&__identifer=lxbyzn2me9

]]>
	</help>
	<expand macro="citations" />
</tool>