view icescreen.xml @ 4:c5f7c311f1b0 draft

planemo upload for repository https://forgemia.inra.fr/ices_imes_analysis/icescreen commit a9bc15365abc70c0367d732b790763d8fa53b220
author iuc
date Tue, 28 Feb 2023 14:10:38 +0000
parents 2bb38197ff75
children 28e82853c217
line wrap: on
line source

<tool id="icescreen" name="ICEscreen" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
    <description>detects and annotates ICEs (Integrative and Conjugative Elements) and IMEs (Integrative and Mobilizable Elements) in Firmicutes genomes.</description>
    <macros>
        <token name="@TOOL_VERSION@">1.1.1</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">icescreen</requirement>
        <requirement type="package" version="3.0">zip</requirement>
    </requirements>
    <version_command><![CDATA[
        icescreen --version
        ]]>
    </version_command>
    <command detect_errors="aggressive"><![CDATA[
        #set $gbname = 'galaxy_genbank'
        mkdir -p ./tmp_icescreen/source_genbank
        && ln -s '${genome}' ./tmp_icescreen/source_genbank/${gbname}.gb
        && icescreen
            --galaxy
            --gbdir ./tmp_icescreen/source_genbank
            --outdir ./tmp_icescreen
            --mode '${taxonomy.mode}'
            --jobs "\${GALAXY_SLOTS:-4}" > ./tmp_icescreen/ICEscreen_log.txt 2>&1

        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_ME.summary > '${summary}'
        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_SP_withMEIds.tsv > '${detected_sp}'
        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_ME.tsv > '${detected_me}'

        #if "output_annotation_genbank" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.gb '${gbout}'
        #end if
        #if "output_annotation_gff" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.gff '${gffout}'
        #end if
        #if "output_annotation_embl" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.embl '${emblout}'
        #end if
        #if "output_log" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_log.txt '${log}'
        #end if
        #if "output_zip_all_files" in $additional_output.optional_files:
            && cd ./tmp_icescreen/ICEscreen_results/results/${gbname} && zip --quiet -r - -- . > '${outzip}'
        #end if
        ]]>
    </command>
    <inputs>
        <param name="genome" type="data" format="genbank" label="Input genomes to analyze in Genbank format" help="Multi-genbank files (i.e. gbff) are supported. Each record must include the ORIGIN nucleotide sequence at the end."/>
        <section name="taxonomy" title="Taxonomy of the genomes to analyse" expanded="False">
            <param name="mode" type="select" label="Firmicutes is the defaut parameter" help="Streptomyces is EXPERIMENTAL and is NOT recommended.">
                <option value="firmicutes" selected="true">Firmicutes</option>
                <option value="streptomyces">Streptomyces</option>
            </param>
        </section>
        <section name="additional_output" title="Additional output files" expanded="False">
            <param name="optional_files" type="select" label="Make the following optional files available in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
                <option value="output_annotation_genbank">Genome annotation in genbank format</option>
                <option value="output_annotation_gff">Genome annotation in gff format</option>
                <option value="output_annotation_embl">Genome annotation in embl format</option>
                <option value="output_zip_all_files">Zip of all final and intermediate results</option>
                <option value="output_log">ICEscreen log</option>
            </param>
        </section>
    </inputs>
    <outputs>
      <data format="tabular" name="detected_sp" label="${tool.name} on ${on_string}: Signature Proteins table" />
      <data format="tabular" name="detected_me" label="${tool.name} on ${on_string}: ICEs/IMEs table" />
      <data format="txt" name="summary" label="${tool.name} on ${on_string}: results summary" />
      <data format="genbank" name="gbout" label="${tool.name} on ${on_string}: annotated genbank" >
          <filter>additional_output['optional_files'] and "output_annotation_genbank" in additional_output['optional_files']</filter>
      </data>
      <data format="gff3" name="gffout" label="${tool.name} on ${on_string}: annotated GFF3" >
          <filter>additional_output['optional_files'] and "output_annotation_gff" in additional_output['optional_files']</filter>
      </data>
      <data format="embl" name="emblout" label="${tool.name} on ${on_string}: annotated EMBL" >
          <filter>additional_output['optional_files'] and "output_annotation_embl" in additional_output['optional_files']</filter>
      </data>
      <data format="zip" name="outzip" label="${tool.name} on ${on_string}: all results zipped">
          <filter>additional_output['optional_files'] and "output_zip_all_files" in additional_output['optional_files']</filter>
      </data>
      <data format="txt" name="log" label="${tool.name} on ${on_string}: log file" >
          <filter>additional_output['optional_files'] and "output_log" in additional_output['optional_files']</filter>
      </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <output name="summary" file="NC_004668_137848_164286_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_004668_137848_164286_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_002359295" />
                    <has_text text="VirB4" />
                    <has_n_columns n="54" />
                    <has_n_lines n="5" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_genbank" />
            <output name="gbout" file="NC_004668_137848_164286_icescreen.gb" ftype="genbank" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_gff" />
            <output name="gffout" file="NC_004668_137848_164286_icescreen.gff" ftype="gff3" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_embl" />
            <output name="emblout" file="NC_004668_137848_164286_icescreen.embl" ftype="embl" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_zip_all_files" />
            <output name="outzip" >
                <assert_contents>
                    <has_archive_member path=".*/*_detected_SP_withMEIds.tsv"><has_text text="ICE_IME_id" /></has_archive_member>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="firmicutes" />
                    <has_text text="Building DAG of jobs" />
                    <has_text text="Complete log" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="mode" value="streptomyces" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="streptomyces" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="5">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="mode" value="firmicutes" />
            <param name="optional_files" value="output_zip_all_files,output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="firmicutes" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_013798_298468_322494_NC_020450_643089_661957.gb" ftype="genbank" />
            <output name="summary" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_044555479.1" />
                    <has_text text="WP_015426013.1" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
        .. class:: warningmark

        ICEscreen requires input files in genbank format. Multigenbank files (i.e. gbff files featuring multiple genome records back to back) are supported. Each Genbank record must include the ORIGIN nucleotide sequence.

-----

        **What it does**

        ICEscreen is a bioinformatic pipeline for the detection and annotation of ICEs (Integrative and Conjugative Elements) and IMEs (Integrative and Mobilizable Elements) in Firmicutes genomes. The full documentation is available at https://icescreen.migale.inrae.fr. The forge of the project is accessible at https://forgemia.inra.fr/ices_imes_analysis/icescreen.

        **Main features of ICEscreen**

        - Detection of signature proteins (SPs) of ICEs/IMEs by using blastP on a curated resource. BlastP allows for an accurate assignment of hits to a given ICE/IME superfamily or family. The curated resource was derived from an analysis of over 120 ICEs and IMEs in Streptococcus genomes by the DINAMIC lab.
        - Detection of distant homologs of SPs by using HMM profiles of ICEs/IMEs protein families. The HMM profiles have been either imported from trusted resources or created and curated when needed.
        - Detection of the ICE/IME structures: ICEScreen groups together SPs that belong to the same ICE/IME structures to the best of its ability.
        - Delimitation of the elements at the gene or nucleotide level is not yet implemented and still needs manual curation.

        **Output files**

        There are 3 main output results files generated by ICEscreen:

        - Detected Signature Proteins table (`*_detected_SP_withMEIds.csv`): list of the signature proteins detected by the tool and their possible assignment to an ICE or IME element. It is a comma separated table of 48 columns with a one line header. Each line represents a signature protein detected by ICEscreen.
        - Detected ICEs/IMEs table (`*_detected_ME.tsv`): list of the ICEs and IMEs elements detected by the tool, including information about the signature proteins they contain. It is a tab separated table of 21 columns, the header is at line #3. Information in this file is similar to the output file _withICEIMEIds.csv (option -m) but centered around a list of ICE / IME structures instead of a list of SPs.
        - Results summary (`*_detected_ME.summary`): this file summarizes the main parameters and statistics regarding the ICE / IME structures and the SPs.

        Other optional and additional output files generated by ICEscreen:

        - Annotated Genbank, EMBL, and GFF3 files: tose files contains the annotations of the original Genbank file in addition to annotations added by ICEscreen such as ICEscreen signature proteins and mobile elements.
        - All results zipped file: You can download this file locally and unzip it to have the complete output folder generated by the tool, including intermediate processing files. See https://icescreen.migale.inrae.fr for details.
        - Log file (`*_detected_ME.log`): this file contains the detailed internal details used by ICEscreen to generate the results and logs of each step of the ICEscreen tool pipeline.

    ]]>

    </help>
    <citations>
        <citation type="bibtex">@UNPUBLISHED{Kim07aninterior-point,
            author = {Julie Lao and Thomas Lacroix and Gérard Guédon and Charles Coluzzi and Nathalie Leblond-Bourget and Hélène Chiapello},
            title = {"See our latest publication at https://icescreen.migale.inrae.fr."}
        }
        </citation>
    </citations>
</tool>