Mercurial > repos > bebatut > compare_humann2_output

<tool id="compare_humann2_output" name="Compare outputs of HUMAnN2 for several samples" version="0.2.0">
    <description>and extract similar and specific information</description>
    <xrefs>
        <xref type="bio.tools">compare_humann2_outputs</xref>
    </xrefs>
    <command detect_errors="exit_code"><![CDATA[
        mkdir specifics
        &&

        python $__tool_directory__/compare_humann2_output.py
            #for $sample in $samples
                --sample_name '$sample.sample_name'
                --charact_input_fp '$sample.charact_input_fp'
                --specific_output_fp 'specifics/specific_to_${sample.sample_name}.txt'
            #end for
            --most_abundant_characteristics_to_extract $most_abundant_characteristics_to_extract
            --more_abundant_output_fp '$more_abundant_output_file'
            --similar_output_fp '$similar_output_file'
            > $log
    ]]></command>
    <inputs>
        <repeat name="samples" title="Add sample and input file (HUMAnN2 output after normalization)" >
            <param argument="sample_name" type="text" label="Name of the sample"/>
            <param argument="charact_input_fp" format="txt,tabular" type="data" label="Input file corresponding to HUMAnN2 output" help="The HUMAnN2 output file contains relative abundance of gene families or pathways (after normalization"/>
        </repeat>
        <param argument="most_abundant_characteristics_to_extract" type="integer" value="10" label="Number of most abundant characteristics to extract for each sample"/>
    </inputs>
    <outputs>
        <data name="more_abundant_output_file" format="tabular"
            label="${tool.name} on ${on_string}: More abundant characteristics for each sample" />
        <data name="similar_output_file" format="tabular"
            label="${tool.name} on ${on_string}: Similar characteristics and the relative abundances for all samples" />
        <data name="log" format="txt"
            label="${tool.name} on ${on_string}: Log" hidden="true"/>
        <collection name="specific_files" type="list">
            <discover_datasets pattern="__designation_and_ext__" directory="specifics"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <repeat name="samples">
                <param name="sample_name" value="sample1"/>
                <param name="charact_input_fp" value="sample_1_pathway_abundances.tabular"/>
            </repeat>
            <repeat name="samples">
                <param name="sample_name" value="sample2"/>
                <param name="charact_input_fp" value="sample_2_pathway_abundances.tabular"/>
            </repeat>
            <param name="most_abundant_characteristics_to_extract" value="10"/>
            <output name="more_abundant_output_file">
                <assert_contents>
                    <has_n_columns n="4"/>
                    <has_n_lines n="16"/>
                    <has_text text="aerobic respiration I |g__Rhodobacter.s__Rhodobacter_sphaeroides"/>
                    <has_text text="phytol degradation|g__Staphylococcus.s__Staphylococcus_aureus"/>
                </assert_contents>
            </output>
            <output name="similar_output_file">
                <assert_contents>
                    <has_n_columns n="4"/>
                    <has_n_lines n="1921"/>
                    <has_text text="superpathway of sulfur amino acid biosynthesis |g__Staphylococcus.s__Staphylococcus_epidermidis"/>
                    <has_text text="tetrapyrrole biosynthesis I |g__Pseudomonas.s__Pseudomonas_aeruginosa"/>
                </assert_contents>
            </output>
            <output name="log">
                <assert_contents>
                    <has_n_lines n="12"/>
                    <has_line line="Similar between all samples: 1920"/>
                    <has_line line="    Number of specific characteristics: 392"/>
                    <has_line line="    Number of specific characteristics: 1335"/>
                </assert_contents>
            </output>
            <output_collection name="specific_files" type="list">
                <element name="specific_to_sample1">
                    <assert_contents>
                        <has_n_columns n="3"/>
                        <has_n_lines n="393"/>
                        <has_text text="GLUCOSE1PMETAB-PWY"/>
                        <has_text text="inosine 5 phosphate biosynthesis III|g__Staphylococcus.s__Staphylococcus_aureus"/>
                    </assert_contents>
                </element>
                <element name="specific_to_sample2">
                    <assert_contents>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1336"/>
                        <has_text text="PEPTIDOGLYCANSYN-PWY"/>
                        <has_text text="nitrate reduction V |g__Deinococcus.s__Deinococcus_radiodurans"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool compare HUMANnN2 outputs with gene families or pathways and their relative abundances between several samples. Several files are extracted:

  * Similar gene families or pathways between the samples and the relative abundances of these similar characteristics

  * Most abundant gene families or pathways for each sample and the corresponding relative abundance in all samples

  * Specific gene families and pathways for each samples and the relative abundances of these specific characteristics

    ]]></help>
</tool>
author	bgruening
date	Sat, 04 Nov 2023 18:59:54 +0000
parents	05766022dfc4
children