annotate generate-putative-isp.xml @ 9:9fa911974de6 draft default tip

planemo upload commit a4912b5d710e0082b764a8c90bf184078e74d83d-dirty
author cpt
date Fri, 20 Sep 2024 03:38:42 +0000
parents b5c1b167df3c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
1 <tool id="edu.tamu.cpt2.spanin.generate-putative-isp" name="ISP candidates" version="1.0">
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
2 <description>constructs a putative list of potential i-spanin from an input genomic FASTA</description>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
3 <macros>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
4 <import>macros.xml</import>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
5 </macros>
9
9fa911974de6 planemo upload commit a4912b5d710e0082b764a8c90bf184078e74d83d-dirty
cpt
parents: 8
diff changeset
6 <requirements>
9fa911974de6 planemo upload commit a4912b5d710e0082b764a8c90bf184078e74d83d-dirty
cpt
parents: 8
diff changeset
7 <requirement type="package" version="3.9">python</requirement>
9fa911974de6 planemo upload commit a4912b5d710e0082b764a8c90bf184078e74d83d-dirty
cpt
parents: 8
diff changeset
8 <requirement type="package" version="1.84">biopython</requirement>
8
b5c1b167df3c planemo upload commit d5c5c50edba8add19aff616c3cbe8bc0c2f36ea5
cpt
parents: 7
diff changeset
9 <requirement type="package" version="2024.9.11">regex</requirement>
9
9fa911974de6 planemo upload commit a4912b5d710e0082b764a8c90bf184078e74d83d-dirty
cpt
parents: 8
diff changeset
10 </requirements>
1
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
11 <command detect_errors="aggressive"><![CDATA[
7
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
12 python '$__tool_directory__/generate-putative-isp.py'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
13 '$fasta_file'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
14 --strand '$strand'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
15 --switch '$switch'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
16 --isp_on '$isp_on'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
17 --isp_op '$isp_op'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
18 --isp_ob '$isp_ob'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
19 --isp_og '$isp_og'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
20 --isp_min_len '$isp_min_len'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
21 --isp_min_dist '$isp_min_dist'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
22 --isp_max_dist '$isp_max_dist'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
23 --min_tmd_size '$min_tmd_size'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
24 --max_tmd_size '$max_tmd_size'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
25 --putative_isp '$putative_isp'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
26 --summary_isp_txt '$summary_isp'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
27 --putative_isp_gff '$putative_isp_gff'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
28 --isp_max '$isp_max'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
29 --peri_min '$peri_min'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
30 --peri_max '$peri_max'
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
31
f0ca7edbe202 planemo upload commit ad5355b899dc8f8300c206522e2a65d49809fb73
cpt
parents: 6
diff changeset
32 ]]></command>
1
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
33 <inputs>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
34 <param type="select" label="Strand Choice" name="strand">
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
35 <option value="both">both</option>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
36 <option value="forward">+</option>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
37 <option value="reverse">-</option>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
38 </param>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
39 <param label="Single Genome FASTA" name="fasta_file" type="data" format="fasta"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
40 <param label="i-spanin minimal length" name="isp_min_len" type="integer" value="60"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
41 <param label="i-spanin maximum length" name="isp_max" type="integer" value="230"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
42 <param label="Range Selection; default is all; for a specific range to check for a spanin input integers separated by a colon (eg. 1000:2000)" type="text" name="switch" value="all"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
43 <param label="TMD minimal distance from start codon" name="isp_min_dist" type="integer" value="10"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
44 <param label="TMD maximum distance from start codon" name="isp_max_dist" type="integer" value="35" help="Searches for a TMD between TMDmin and TMDmax ie [TMDmin,TMDmax]"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
45 <param label="TMD minimal size" name="min_tmd_size" type="integer" value="10"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
46 <param label="TMD maximum size" name="max_tmd_size" type="integer" value="25"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
47 <param label="Periplasmic minimal residue amount" name="peri_min" type="integer" value="16"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
48 <param label="Periplasmic maximum residue amount" name="peri_max" type="integer" value="206"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
49 </inputs>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
50 <outputs>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
51 <data format="fasta" name="isp_on" label="NucSequences.fa" hidden="true"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
52 <data format="fasta" name="isp_op" label="ProtSequences.fa" hidden="true"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
53 <data format="bed" name="isp_ob" label="BED_Output.bed" hidden="true"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
54 <data format="gff3" name="isp_og" label="GFF_Output.gff" hidden="true"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
55 <data format="fasta" name="putative_isp" label="putative_isp.fa"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
56 <data format="txt" name="summary_isp" label="summary_isp.txt"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
57 <data format="gff3" name="putative_isp_gff" label="putative_isp.gff3"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
58 </outputs>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
59 <help><![CDATA[
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
60
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
61 **What it does**
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
62 Searches a genome for candidate i-spanins (ISPs), a phage protein involved in outer membrane disruption during Gram-negative bacterial host cell lysis.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
63
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
64 **METHODOLOGY**
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
65
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
66 Locates ALL potential start sequences, based on TTG / ATG / GTG (M / L / V). This list is pared down to those within the user-set min/max lengths. That filtered list generates a set of files with the ORFs in FASTA (nt and aa), BED, and GFF3 file formats.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
67
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
68 With the protein FASTA, the tool next reads in each potential sequence and determines if it has a putative transmembrane domain (TMD) with the following criteria:
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
69
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
70 1. Presence of snorkeling Lysine residues surrounded by hydrophobic residues described for TMD below, within the range the user specifies.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
71 2. A putative transmembrane domain, or TMD, defined as a repeated hydrophobic region within the sequence ([FIWLVMYCATGSP]), of length and position within the range the user inputs.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
72 3. Length of expected periplasmic region. User defines minimum and maximum thresholds for required number of residues after TMD.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
73
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
74 **INPUT** --> Genomic FASTA
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
75 *NOTE: This tool only takes a SINGLE genomic fasta. It does not work with multiFASTAs.*
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
76
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
77 **OUTPUT** --> putative_isp.fa (FASTA) file, putative_isp.gff3, and basic summary statistics as summary_isp.txt.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
78
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
79 Protein sequences which passed the above filters are returned as the candidate ISPs.
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
80
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
81 ]]></help>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
82 <expand macro="citations-crr"/>
4e02e6e9e77d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
83 </tool>