view tools/networkAnalysis/SideCompoundsScan/SideCompoundsScan.xml @ 5:35c9abcd8934 draft

planemo upload for repository https://forgemia.inra.fr/metexplore/met4j-galaxy commit 8577c4cd3ad279c5e97f48f822e041c6b0d90598
author metexplore
date Thu, 12 Jan 2023 13:45:13 +0000
parents ae4c301919c4
children 7a6f2380fc1d
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8"?>
<tool id="met4j_SideCompoundsScan" name="SideCompoundsScan" version="1.2.1">
  <description>Scan a network to identify side-compounds.</description>
  <xrefs>
    <xref type="bio.tools">met4j</xref>
  </xrefs>
  <requirements>
    <container type="singularity">oras://registry.forgemia.inra.fr/metexplore/met4j/met4j-singularity:1.2.1</container>
  </requirements>
  <command detect_errors="exit_code"><![CDATA[sh /usr/bin/met4j.sh networkAnalysis.SideCompoundsScan -i "$inputPath"
 $sideOnly
 $noReportValue
#if str($degree):
 -d "$degree"
#end if
#if str($degreePrecentile) != 'nan':
 -dp "$degreePrecentile"
#end if
 $flagInorganic
 $flagNoFormula
#if str($parallelEdge) != 'nan':
 -nc "$parallelEdge"
#end if
#if str($mergingStrat):
 -m "$mergingStrat"
#end if
 -o "$outputPath"
]]></command>
  <inputs>
    <param argument="-i" format="sbml" label="input SBML file" name="inputPath" optional="false" type="data" value=""/>
    <param argument="-s" checked="false" falsevalue="" label="output compounds flagged as side-Compounds only" name="sideOnly" truevalue="-s" type="boolean" value="false"/>
    <param argument="-id" checked="false" falsevalue="" label="do not report values in output, export ids list of compounds flagged as side-Compounds, allowing piping results" name="noReportValue" truevalue="-id" type="boolean" value="false"/>
    <param argument="-d" label="flag as side compounds any compounds with degree above threshold" name="degree" optional="true" type="text" value="400">
      <sanitizer invalid_char="_">
        <valid initial="string.printable"/>
      </sanitizer>
    </param>
    <param argument="-dp" label="flag as side compounds the top x% of compounds according to their degree" name="degreePrecentile" optional="true" type="float" value="NaN"/>
    <param argument="-cc" checked="false" falsevalue="" label="flag as side compound any compounds with less than 2 carbons in formula" name="flagInorganic" truevalue="-cc" type="boolean" value="false"/>
    <param argument="-uf" checked="false" falsevalue="" label="flag as side compound any compounds with no valid chemical formula" name="flagNoFormula" truevalue="-uf" type="boolean" value="false"/>
    <param argument="-nc" label="flag as side compound any compound with a number of parallel edges shared with a neighbor above the given threshold" name="parallelEdge" optional="true" type="float" value="NaN"/>
    <param argument="-m" label="Degree is shared between compounds in different compartments. Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form &quot;xxx_y&quot; with xxx as base identifier and y as compartment label)." name="mergingStrat" optional="true" type="select" value="no">
      <option selected="true" value="no">no</option>
      <option value="by_name">by_name</option>
      <option value="by_id">by_id</option>
    </param>
  </inputs>
  <outputs>
    <data format="tsv" name="outputPath"/>
  </outputs>
  <tests>
    <test>
      <param name="inputPath" value="XF_network.sbml"/>
      <output ftype="tsv" name="outputPath">
        <assert_contents>
          <has_n_lines n="1108"/>
          <has_n_columns n="4"/>
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="inputPath" value="XF_network.sbml"/>
      <param name="noReportValue" value="true"/>
      <output ftype="tsv" name="outputPath">
        <assert_contents>
          <has_n_lines n="6"/>
          <has_n_columns n="1"/>
        </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[Scan a network to identify side-compounds.
Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.
Side compounds tend to be ubiquitous and not specific to a particular biochemical or physiological process.Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.
This tool attempts to propose a list of side compounds according to specific criteria:  
- *Degree*: Compounds with an uncommonly high number of neighbors can betray a lack of process specificity.  
High degree compounds typically include water and most main cofactors (CoA, ATP, NADPH...) but can also include central compounds such as pyruvate or acetyl-CoA  
- *Neighbor Coupling*: Similar to degree, this criteria assume that side compounds are involved in many reactions, but in pairs with other side compounds.
Therefore, the transition from ATP to ADP will appear multiple time in the network, creating redundant 'parallel edges' between these two neighbors.
Being tightly coupled to another compound through a high number of redundant edges, can point out cofactors while keeping converging pathways' products with high degree like pyruvate aside.  
- *Carbon Count*: Metabolic "waste", or degradation end-product such as ammonia or carbon dioxide are usually considered as side compounds.
Most of them are inorganic compound, another ill-defined concept, sometimes defined as compound lacking C-C or C-H bonds. Since chemical structure is rarely available in SBML model beyond chemical formula, we use a less restrictive criterion by flagging compound with one or no carbons. This cover most inorganic compounds, but include few compounds such as methane usually considered as organic.  - *Chemical Formula*: Metabolic network often contains 'artifacts' that serve modelling purpose (to define a composite objective function for example). Such entities can be considered as 'side entities'. Since they are not actual chemical compounds, they can be detected by their lack of valid chemical formula. However, this can also flag main compounds with erroneous or missing annotation.]]></help>
  <citations/>
</tool>