Mercurial > repos > cpt > cpt_putative_isp
diff generate-putative-isp.xml @ 1:4e02e6e9e77d draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:35 +0000 |
parents | |
children | 0a02ef22ce17 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate-putative-isp.xml Mon Jun 05 02:51:35 2023 +0000 @@ -0,0 +1,81 @@ +<tool id="edu.tamu.cpt2.spanin.generate-putative-isp" name="ISP candidates" version="1.0"> + <description>constructs a putative list of potential i-spanin from an input genomic FASTA</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"> + </expand> + <command detect_errors="aggressive"><![CDATA[ +python '$__tool_directory__/generate-putative-isp.py' +'$fasta_file' +--strand '$strand' +--switch '$switch' +--isp_on '$isp_on' +--isp_op '$isp_op' +--isp_ob '$isp_ob' +--isp_og '$isp_og' +--isp_min_len '$isp_min_len' +--isp_min_dist '$isp_min_dist' +--isp_max_dist '$isp_max_dist' +--min_tmd_size '$min_tmd_size' +--max_tmd_size '$max_tmd_size' +--putative_isp '$putative_isp' +--summary_isp_txt '$summary_isp' +--putative_isp_gff '$putative_isp_gff' +--isp_max '$isp_max' +--peri_min '$peri_min' +--peri_max '$peri_max' + +]]></command> + <inputs> + <param type="select" label="Strand Choice" name="strand"> + <option value="both">both</option> + <option value="forward">+</option> + <option value="reverse">-</option> + </param> + <param label="Single Genome FASTA" name="fasta_file" type="data" format="fasta"/> + <param label="i-spanin minimal length" name="isp_min_len" type="integer" value="60"/> + <param label="i-spanin maximum length" name="isp_max" type="integer" value="230"/> + <param label="Range Selection; default is all; for a specific range to check for a spanin input integers separated by a colon (eg. 1000:2000)" type="text" name="switch" value="all"/> + <param label="TMD minimal distance from start codon" name="isp_min_dist" type="integer" value="10"/> + <param label="TMD maximum distance from start codon" name="isp_max_dist" type="integer" value="35" help="Searches for a TMD between TMDmin and TMDmax ie [TMDmin,TMDmax]"/> + <param label="TMD minimal size" name="min_tmd_size" type="integer" value="10"/> + <param label="TMD maximum size" name="max_tmd_size" type="integer" value="25"/> + <param label="Periplasmic minimal residue amount" name="peri_min" type="integer" value="16"/> + <param label="Periplasmic maximum residue amount" name="peri_max" type="integer" value="206"/> + </inputs> + <outputs> + <data format="fasta" name="isp_on" label="NucSequences.fa" hidden="true"/> + <data format="fasta" name="isp_op" label="ProtSequences.fa" hidden="true"/> + <data format="bed" name="isp_ob" label="BED_Output.bed" hidden="true"/> + <data format="gff3" name="isp_og" label="GFF_Output.gff" hidden="true"/> + <data format="fasta" name="putative_isp" label="putative_isp.fa"/> + <data format="txt" name="summary_isp" label="summary_isp.txt"/> + <data format="gff3" name="putative_isp_gff" label="putative_isp.gff3"/> + </outputs> + <help><![CDATA[ + +**What it does** +Searches a genome for candidate i-spanins (ISPs), a phage protein involved in outer membrane disruption during Gram-negative bacterial host cell lysis. + +**METHODOLOGY** + +Locates ALL potential start sequences, based on TTG / ATG / GTG (M / L / V). This list is pared down to those within the user-set min/max lengths. That filtered list generates a set of files with the ORFs in FASTA (nt and aa), BED, and GFF3 file formats. + +With the protein FASTA, the tool next reads in each potential sequence and determines if it has a putative transmembrane domain (TMD) with the following criteria: + + 1. Presence of snorkeling Lysine residues surrounded by hydrophobic residues described for TMD below, within the range the user specifies. + 2. A putative transmembrane domain, or TMD, defined as a repeated hydrophobic region within the sequence ([FIWLVMYCATGSP]), of length and position within the range the user inputs. + 3. Length of expected periplasmic region. User defines minimum and maximum thresholds for required number of residues after TMD. + +**INPUT** --> Genomic FASTA +*NOTE: This tool only takes a SINGLE genomic fasta. It does not work with multiFASTAs.* + +**OUTPUT** --> putative_isp.fa (FASTA) file, putative_isp.gff3, and basic summary statistics as summary_isp.txt. + +Protein sequences which passed the above filters are returned as the candidate ISPs. + +]]></help> + <expand macro="citations-crr"/> +</tool>