Mercurial > repos > iuc > icescreen

<tool id="icescreen" name="ICEscreen" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
    <description>detects and annotates ICEs (Integrative and Conjugative Elements) and IMEs (Integrative and Mobilizable Elements) in Bacillota genomes.</description>
    <macros>
        <token name="@TOOL_VERSION@">1.3.1</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">icescreen</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">icescreen</requirement>
        <requirement type="package" version="3.0">zip</requirement>
    </requirements>
    <version_command><![CDATA[
        icescreen --version
        ]]>
    </version_command>
    <command detect_errors="aggressive"><![CDATA[
        #set $gbname = 'galaxy_genbank'
        mkdir -p ./tmp_icescreen/source_genbank
        && ln -s '${genome}' ./tmp_icescreen/source_genbank/${gbname}.gb
        && icescreen
            --galaxy
            --gbdir ./tmp_icescreen/source_genbank
            --outdir ./tmp_icescreen
            --phylum '${taxonomy.phylum}'
            --jobs "\${GALAXY_SLOTS:-4}" > ./tmp_icescreen/ICEscreen_log.txt 2>&1

        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_ME.summary > '${summary}'
        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_SP_withMEIds.tsv > '${detected_sp}'
        && cat ./tmp_icescreen/ICEscreen_results/results/${gbname}/icescreen_detection_ME/${gbname}_detected_ME.tsv > '${detected_me}'

        #if "output_annotation_genbank" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.gb '${gbout}'
        #end if
        #if "output_annotation_gff" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.gff '${gffout}'
        #end if
        #if "output_annotation_embl" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_results/results/${gbname}/visualization_files/${gbname}_icescreen.embl '${emblout}'
        #end if
        #if "output_log" in $additional_output.optional_files:
            && mv ./tmp_icescreen/ICEscreen_log.txt '${log}'
        #end if
        #if "output_zip_all_files" in $additional_output.optional_files:
            && cd ./tmp_icescreen/ICEscreen_results/results/${gbname} && zip --quiet -r - -- . > '${outzip}'
        #end if
        ]]>
    </command>
    <inputs>
        <param name="genome" type="data" format="genbank" label="Input genomes to analyze in Genbank format" help="Multi-genbank files (i.e. gbff) are supported. Each record must include the ORIGIN nucleotide sequence at the end."/>
        <section name="taxonomy" title="Phylum of the genomes to analyse" expanded="True">
            <param name="phylum" type="select" label="Bacillota is the defaut parameter">
                <option value="bacillota" selected="true">Bacillota</option>
            </param>
        </section>
        <section name="additional_output" title="Additional output files" expanded="False">
            <param name="optional_files" type="select" label="Make the following optional files available in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
                <option value="output_annotation_genbank">Genome annotation in genbank format</option>
                <option value="output_annotation_gff">Genome annotation in gff format</option>
                <option value="output_annotation_embl">Genome annotation in embl format</option>
                <option value="output_zip_all_files">Zip of all final and intermediate results</option>
                <option value="output_log">ICEscreen log</option>
            </param>
        </section>
    </inputs>
    <outputs>
      <data format="tabular" name="detected_sp" label="${tool.name} on ${on_string}: Signature Proteins table" />
      <data format="tabular" name="detected_me" label="${tool.name} on ${on_string}: ICEs/IMEs table" />
      <data format="txt" name="summary" label="${tool.name} on ${on_string}: results summary" />
      <data format="genbank" name="gbout" label="${tool.name} on ${on_string}: annotated genbank" >
          <filter>additional_output['optional_files'] and "output_annotation_genbank" in additional_output['optional_files']</filter>
      </data>
      <data format="gff3" name="gffout" label="${tool.name} on ${on_string}: annotated GFF3" >
          <filter>additional_output['optional_files'] and "output_annotation_gff" in additional_output['optional_files']</filter>
      </data>
      <data format="embl" name="emblout" label="${tool.name} on ${on_string}: annotated EMBL" >
          <filter>additional_output['optional_files'] and "output_annotation_embl" in additional_output['optional_files']</filter>
      </data>
      <data format="zip" name="outzip" label="${tool.name} on ${on_string}: all results zipped">
          <filter>additional_output['optional_files'] and "output_zip_all_files" in additional_output['optional_files']</filter>
      </data>
      <data format="txt" name="log" label="${tool.name} on ${on_string}: log file" >
          <filter>additional_output['optional_files'] and "output_log" in additional_output['optional_files']</filter>
      </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <output name="summary" file="NC_004668_137848_164286_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_004668_137848_164286_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_002359295" />
                    <has_text text="VirB4" />
                    <has_n_columns n="54" />
                    <has_n_lines n="5" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_genbank" />
            <output name="gbout" file="NC_004668_137848_164286_icescreen.gb" ftype="genbank" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_gff" />
            <output name="gffout" file="NC_004668_137848_164286_icescreen.gff" ftype="gff3" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_embl" />
            <output name="emblout" file="NC_004668_137848_164286_icescreen.embl" ftype="embl" />
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_zip_all_files" />
            <output name="outzip" >
                <assert_contents>
                    <has_archive_member path=".*/*_detected_SP_withMEIds.tsv"><has_text text="ICE_IME_id" /></has_archive_member>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <has_text text="Building DAG of jobs" />
                    <has_text text="Complete log" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="phylum" value="bacillota" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="5">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="phylum" value="bacillota" />
            <param name="optional_files" value="output_zip_all_files,output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_013798_298468_322494_NC_020450_643089_661957.gb" ftype="genbank" />
            <output name="summary" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_044555479.1" />
                    <has_text text="WP_015426013.1" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
        .. class:: warningmark

        ICEscreen requires input files in genbank format. Multigenbank files (i.e. gbff files featuring multiple genome records back to back) are supported. Each Genbank record must include the ORIGIN nucleotide sequence.

-----

        **What it does**

        ICEscreen is a bioinformatic pipeline for the detection and annotation of ICEs (Integrative and Conjugative Elements) and IMEs (Integrative and Mobilizable Elements) in Bacillota genomes. The full documentation is available at https://icescreen.migale.inrae.fr. The forge of the project is accessible at https://forgemia.inra.fr/ices_imes_analysis/icescreen.

        **Main features of ICEscreen**

        - Detection of signature proteins (SPs) of ICEs/IMEs by using blastP on a curated resource. BlastP allows for an accurate assignment of hits to a given ICE/IME superfamily or family. The curated resource was derived from an analysis of over 120 ICEs and IMEs in Streptococcus genomes by the DINAMIC lab.
        - Detection of distant homologs of SPs by using HMM profiles of ICEs/IMEs protein families. The HMM profiles have been either imported from trusted resources or created and curated when needed.
        - Detection of the ICE/IME structures: ICEScreen groups together SPs that belong to the same ICE/IME structures to the best of its ability.
        - Delimitation of the elements at the gene or nucleotide level is not yet implemented and still needs manual curation.

        **Output files**

        There are 3 main output results files generated by ICEscreen:

        - Detected Signature Proteins table (`*_detected_SP_withMEIds.csv`): list of the signature proteins detected by the tool and their possible assignment to an ICE or IME element. Each line represents a signature protein detected by ICEscreen.
        - Detected ICEs/IMEs table (`*_detected_ME.tsv`): list of the ICEs and IMEs elements detected by the tool, including information about the signature proteins they contain. Information in this file is complementary to the _withICEIMEIds.csv file above with each line representing an ICE / IME structures instead of a signature protein.
        - Results summary (`*_detected_ME.summary`): this file summarizes the main parameters and statistics regarding the ICE / IME structures and the SPs.

        Other optional and additional output files generated by ICEscreen:

        - Annotated Genbank, EMBL, and GFF3 files: tose files contains the annotations of the original Genbank file in addition to annotations added by ICEscreen such as ICEscreen signature proteins and mobile elements.
        - All results zipped file: You can download this file locally and unzip it to have the complete output folder generated by the tool, including intermediate processing files. See https://icescreen.migale.inrae.fr for details.
        - Log file (`*_detected_ME.log`): this file contains the detailed internal details used by ICEscreen to generate the results and logs of each step of the ICEscreen tool pipeline.

    ]]>

    </help>
    <citations>
        <citation type="bibtex">@UNPUBLISHED{Kim07aninterior-point,
            author = {Julie Lao and Thomas Lacroix and Gérard Guédon and Charles Coluzzi and Nathalie Leblond-Bourget and Hélène Chiapello},
            title = {"See our latest publication at https://icescreen.migale.inrae.fr."}
        }
        </citation>
    </citations>
</tool>
author	iuc
date	Fri, 09 Feb 2024 21:26:42 +0000
parents	d45c95f382d3
children