view generate-putative-isp.xml @ 8:b5c1b167df3c draft

planemo upload commit d5c5c50edba8add19aff616c3cbe8bc0c2f36ea5
author cpt
date Fri, 20 Sep 2024 03:03:00 +0000
parents f0ca7edbe202
children 9fa911974de6
line wrap: on
line source

<tool id="edu.tamu.cpt2.spanin.generate-putative-isp" name="ISP candidates" version="1.0">
    <description>constructs a putative list of potential i-spanin from an input genomic FASTA</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="2024.9.11">regex</requirement>
    </expand>
    <command detect_errors="aggressive"><![CDATA[
python '$__tool_directory__/generate-putative-isp.py'
'$fasta_file'
--strand '$strand'
--switch '$switch'
--isp_on '$isp_on'
--isp_op '$isp_op'
--isp_ob '$isp_ob'
--isp_og '$isp_og'
--isp_min_len '$isp_min_len'
--isp_min_dist '$isp_min_dist'
--isp_max_dist '$isp_max_dist'
--min_tmd_size '$min_tmd_size'
--max_tmd_size '$max_tmd_size'
--putative_isp '$putative_isp'
--summary_isp_txt '$summary_isp'
--putative_isp_gff '$putative_isp_gff'
--isp_max '$isp_max'
--peri_min '$peri_min'
--peri_max '$peri_max'

]]></command>
    <inputs>
        <param type="select" label="Strand Choice" name="strand">
            <option value="both">both</option>
            <option value="forward">+</option>
            <option value="reverse">-</option>
        </param>
        <param label="Single Genome FASTA" name="fasta_file" type="data" format="fasta"/>
        <param label="i-spanin minimal length" name="isp_min_len" type="integer" value="60"/>
        <param label="i-spanin maximum length" name="isp_max" type="integer" value="230"/>
        <param label="Range Selection; default is all; for a specific range to check for a spanin input integers separated by a colon (eg. 1000:2000)" type="text" name="switch" value="all"/>
        <param label="TMD minimal distance from start codon" name="isp_min_dist" type="integer" value="10"/>
        <param label="TMD maximum distance from start codon" name="isp_max_dist" type="integer" value="35" help="Searches for a TMD between TMDmin and TMDmax ie [TMDmin,TMDmax]"/>
        <param label="TMD minimal size" name="min_tmd_size" type="integer" value="10"/>
        <param label="TMD maximum size" name="max_tmd_size" type="integer" value="25"/>
        <param label="Periplasmic minimal residue amount" name="peri_min" type="integer" value="16"/>
        <param label="Periplasmic maximum residue amount" name="peri_max" type="integer" value="206"/>
    </inputs>
    <outputs>
        <data format="fasta" name="isp_on" label="NucSequences.fa" hidden="true"/>
        <data format="fasta" name="isp_op" label="ProtSequences.fa" hidden="true"/>
        <data format="bed" name="isp_ob" label="BED_Output.bed" hidden="true"/>
        <data format="gff3" name="isp_og" label="GFF_Output.gff" hidden="true"/>
        <data format="fasta" name="putative_isp" label="putative_isp.fa"/>
        <data format="txt" name="summary_isp" label="summary_isp.txt"/>
        <data format="gff3" name="putative_isp_gff" label="putative_isp.gff3"/>
    </outputs>
    <help><![CDATA[

**What it does**
Searches a genome for candidate i-spanins (ISPs), a phage protein involved in outer membrane disruption during Gram-negative bacterial host cell lysis.

**METHODOLOGY**

Locates ALL potential start sequences, based on TTG / ATG / GTG (M / L / V). This list is pared down to those within the user-set min/max lengths. That filtered list generates a set of files with the ORFs in FASTA (nt and aa), BED, and GFF3 file formats.

With the protein FASTA, the tool next reads in each potential sequence and determines if it has a putative transmembrane domain (TMD) with the following criteria:

    1. Presence of snorkeling Lysine residues surrounded by hydrophobic residues described for TMD below, within the range the user specifies.
    2. A putative transmembrane domain, or TMD, defined as a repeated hydrophobic region within the sequence ([FIWLVMYCATGSP]), of length and position within the range the user inputs.
    3. Length of expected periplasmic region. User defines minimum and maximum thresholds for required number of residues after TMD.

**INPUT** --> Genomic FASTA
*NOTE: This tool only takes a SINGLE genomic fasta. It does not work with multiFASTAs.*

**OUTPUT** --> putative_isp.fa (FASTA) file, putative_isp.gff3, and basic summary statistics as summary_isp.txt.

Protein sequences which passed the above filters are returned as the candidate ISPs.

]]></help>
    <expand macro="citations-crr"/>
</tool>