view generate-putative-isp.xml @ 2:0a02ef22ce17 draft default tip

planemo upload commit f33bdf952d796c5d7a240b132af3c4cbd102decc
author cpt
date Fri, 05 Jan 2024 05:56:25 +0000
parents 4e02e6e9e77d
children
line wrap: on
line source

<tool id="edu.tamu.cpt2.spanin.generate-putative-isp" name="ISP candidates" version="1.0">
    <description>constructs a putative list of potential i-spanin from an input genomic FASTA</description>
    <macros>
        <import>macros.xml</import>
    
    </macros>
    <expand macro="requirements">
    </expand>
    <command detect_errors="aggressive"><![CDATA[
python '$__tool_directory__/generate-putative-isp.py'
'$fasta_file'
--strand '$strand'
--switch '$switch'
--isp_on '$isp_on'
--isp_op '$isp_op'
--isp_ob '$isp_ob'
--isp_og '$isp_og'
--isp_min_len '$isp_min_len'
--isp_min_dist '$isp_min_dist'
--isp_max_dist '$isp_max_dist'
--min_tmd_size '$min_tmd_size'
--max_tmd_size '$max_tmd_size'
--putative_isp '$putative_isp'
--summary_isp_txt '$summary_isp'
--putative_isp_gff '$putative_isp_gff'
--isp_max '$isp_max'
--peri_min '$peri_min'
--peri_max '$peri_max'

]]></command>
    <inputs>
        <param type="select" label="Strand Choice" name="strand">
            <option value="both">both</option>
            <option value="forward">+</option>
            <option value="reverse">-</option>
        </param>
        <param label="Single Genome FASTA" name="fasta_file" type="data" format="fasta"/>
        <param label="i-spanin minimal length" name="isp_min_len" type="integer" value="60"/>
        <param label="i-spanin maximum length" name="isp_max" type="integer" value="230"/>
        <param label="Range Selection; default is all; for a specific range to check for a spanin input integers separated by a colon (eg. 1000:2000)" type="text" name="switch" value="all"/>
        <param label="TMD minimal distance from start codon" name="isp_min_dist" type="integer" value="10"/>
        <param label="TMD maximum distance from start codon" name="isp_max_dist" type="integer" value="35" help="Searches for a TMD between TMDmin and TMDmax ie [TMDmin,TMDmax]"/>
        <param label="TMD minimal size" name="min_tmd_size" type="integer" value="10"/>
        <param label="TMD maximum size" name="max_tmd_size" type="integer" value="25"/>
        <param label="Periplasmic minimal residue amount" name="peri_min" type="integer" value="16"/>
        <param label="Periplasmic maximum residue amount" name="peri_max" type="integer" value="206"/>
    </inputs>
    <outputs>
        <data format="fasta" name="isp_on" label="NucSequences.fa" hidden="true"/>
        <data format="fasta" name="isp_op" label="ProtSequences.fa" hidden="true"/>
        <data format="bed" name="isp_ob" label="BED_Output.bed" hidden="true"/>
        <data format="gff3" name="isp_og" label="GFF_Output.gff" hidden="true"/>
        <data format="fasta" name="putative_isp" label="putative_isp.fa"/>
        <data format="txt" name="summary_isp" label="summary_isp.txt"/>
        <data format="gff3" name="putative_isp_gff" label="putative_isp.gff3"/>
    </outputs>
    <help><![CDATA[

**What it does**
Searches a genome for candidate i-spanins (ISPs), a phage protein involved in outer membrane disruption during Gram-negative bacterial host cell lysis.

**METHODOLOGY**

Locates ALL potential start sequences, based on TTG / ATG / GTG (M / L / V). This list is pared down to those within the user-set min/max lengths. That filtered list generates a set of files with the ORFs in FASTA (nt and aa), BED, and GFF3 file formats.

With the protein FASTA, the tool next reads in each potential sequence and determines if it has a putative transmembrane domain (TMD) with the following criteria:

    1. Presence of snorkeling Lysine residues surrounded by hydrophobic residues described for TMD below, within the range the user specifies.
    2. A putative transmembrane domain, or TMD, defined as a repeated hydrophobic region within the sequence ([FIWLVMYCATGSP]), of length and position within the range the user inputs.
    3. Length of expected periplasmic region. User defines minimum and maximum thresholds for required number of residues after TMD.

**INPUT** --> Genomic FASTA
*NOTE: This tool only takes a SINGLE genomic fasta. It does not work with multiFASTAs.*

**OUTPUT** --> putative_isp.fa (FASTA) file, putative_isp.gff3, and basic summary statistics as summary_isp.txt.

Protein sequences which passed the above filters are returned as the candidate ISPs.

]]></help>
    <expand macro="citations-crr"/>
</tool>