diff searchFile.xml @ 1:6e3a843b6304 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:53:18 +0000
parents
children 21ae0e340d80
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchFile.xml	Mon Jun 05 02:53:18 2023 +0000
@@ -0,0 +1,153 @@
+<tool id="edu.tamu.cpt.proximity.searchFile" name="Search File" version="1.0">
+    <description>Queries a gff3, genbank, fasta, or blastxml file for a user defined set of terms</description>
+    <macros>
+        <import>macros.xml</import>
+        <import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package" version="0.10.1">gffutils</requirement>
+    </expand>
+    <command detect_errors="aggressive"><![CDATA[
+python2.7 '$__tool_directory__/searchFile.py'
+    #if $check is not None:
+    --dbaseTerms "$check"
+    #end if
+    #if $term_add.term_selector == "customtxt":
+    --custom_txt "$term_add.custom_txt"
+    #elif $term_add.term_selector == "customfile":
+    --custom_file "$term_add.custom_file"
+    #else:
+    #pass
+    #end if
+    #for $input_select in $input_files:
+        #if $input_select.file_selector.file_select == "gff3selection":
+            --gff3_files #for $gff3_file in $input_select.file_selector.gff3_files:
+                "${gff3_file}" #end for
+        #end if
+        #if $input_select.file_selector.file_select == "gbkselection":
+            --gbk_files #for $gbk_file in $input_select.file_selector.gbk_files:
+                "${gbk_file}" #end for
+        #end if
+        #if $input_select.file_selector.file_select == "faselection":
+            --fa_files #for $fa_file in $input_select.file_selector.fa_files:
+                "${fa_file}" #end for
+        #end if
+        #if $input_select.file_selector.file_select == "blastselection":
+            --blast_files #for $blast_file in $input_select.file_selector.blast_files:
+                "${blast_file}" #end for
+        #end if
+    #end for
+    #if $prox:
+        --prox
+    #end if
+    --output '$output'
+    ]]></command>
+    <inputs>
+        <param label="Using Proximity to Lysis Pipeline?" name="prox" type="boolean" truevalue="--prox" falsevalue="" checked="false" help="required GFF3 input"/>
+        <param name="check" type="select" format="text" optional="true" label="Family terms to search" multiple="true" help="Terms avilable to query from the Lysis-family synonym database (see terms in Shared Data/Lysis family Terms)" display="checkboxes">
+            <option value="endolysins" selected="true">Endolysins</option>
+            <option value="holins" selected="true">Holins and Anti-holins</option>
+            <option value="spanins" selected="true">Spanins</option>
+            <option value="endolysin_domains" selected="true">Endolysin Associated Domains</option>
+            <option value="spanin_domains" selected="true">Spanin Associated Domains</option>
+            <option value="holin_domains" selected="true">Holin Associated Domains</option>
+        </param>
+        <conditional name="term_add">
+            <param name="term_selector" type="select" label="Choose if you'd like to add custom terms">
+                <option value="nocustom" selected="false">No Custom Terms</option>
+                <option value="customtxt" selected="false">Custom Text</option>
+                <option value="customfile" selected="false">Custom File</option>
+            </param>
+            <when value="customtxt">
+                <param name="custom_txt" label="Custom Text" optional="true" type="text" area="true" help="Custom text box for search terms, must be separated by newline (enter)"/>
+            </when>
+            <when value="customfile">
+                <param name="custom_file" label="Custom File" optional="true" type="data" format="txt" help="Custom search terms, uploaded via file, where terms must be separated by newline"/>
+            </when>
+        </conditional>
+        <repeat name="input_files" title="Input Files">
+            <conditional name="file_selector">
+                <param name="file_select" type="select" label="Choose the type of file(s) you'd like to query">
+                    <option value="gff3selection" selected="false">GFF3</option>
+                    <option value="gbkselection" selected="false">Genbank</option>
+                    <option value="faselection" selected="false">FASTA</option>
+                    <option value="blastselection" selected="false">BLAST-XML</option>
+                </param>
+                <when value="gff3selection">
+                    <param name="gff3_files" label="GFF3 Input" optional="true" multiple="true" type="data" format="gff3"/>
+                </when>
+                <when value="gbkselection">
+                    <param name="gbk_files" label="Genbank Input" optional="true" multiple="true" type="data" format="genbank"/>
+                </when>
+                <when value="faselection">
+                    <param name="fa_files" label="FASTA Input" optional="true" multiple="true" type="data" format="fasta"/>
+                </when>
+                <when value="blastselection">
+                    <param name="blast_files" label="BLAST-xml Input" optional="true" multiple="true" type="data" format="xml"/>
+                </when>
+            </conditional>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data format="txt" name="output" label="termHits">
+            <change_format>
+                <when input="prox" value="--prox" format="gff3"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="prox" value="--prox"/>
+            <param name="check" value="spanins"/>
+            <repeat name="input_files">
+                <conditional name="file_selector">
+                    <param name="file_select" value="gff3selection"/>
+                    <param name="gff3_files" value="lambda_spaninBLAST.gff3"/>
+                </conditional>
+            </repeat>
+            <output name="output" file="termHits"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+INPUT : A user's file(s) of interest (gff3, gbk, fasta, or blast-xml) and query said file(s) with a determined set of search terms. The search terms can be either from the curated lysis synonym database, and/or a custom input via file or text box. Additionally, if a user is performing this script in the proximity to lysis workflow, the selector needs to be selected to true, and only gff3 files will be able to complete the job.
+
+OUTPUT : "termHits.txt" output file has file-type separated returns for features, or lines, that contained hits to a query term. See file search areas, for the regions of each type of file that are searched. If used for the proximity to lysis workflow, the output will be a proxHits.gff3 file that will be able to be fed to the next stages of the pipeline.
+
+DBase Family Term Descriptions :
+
+- Endolysins: enzymes that attack one of the structural bonds of the peptidoglycan. The endolysin step requires the hole-forming function of the holins.
+
+- Holins and Anti-holins: small cytoplasmic membrane proteins that control the timing of lysis by forming a lethal membrane hole at a programmed time (programmed into the holin itself).
+
+- Spanins: A periplasm-spanning protein complex that disrupts the outer membrane of the host in phage lysis. These complexes are currently identified as either a unimolecular spanin (u-spanin) or two-component system (i-spanin and o-spanin pair).
+
+- Endolysins Domains: information pulled from PMID: 30873139
+
+- Spanin Domains: information pulled from http://www.ebi.ac.uk/interpro/search/text/spanin/
+
+- Holin Domains: information pulled from http://www.ebi.ac.uk/interpro/search/text/holin/
+
+Additionally, some terms were added by querying QuickGO, located at https://www.ebi.ac.uk/QuickGO/
+
+File Search Areas:
+
+- gff3 : entire row
+
+- genbank : product and note qualifier of each feature
+
+- fasta : header field
+
+- blast-xml : hit description
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">
+                @unpublished{galaxyTools,
+                author = {C. Ross},
+                title = {CPT Galaxy Tools},
+                year = {2020-},
+                note = {https://github.com/tamu-cpt/galaxy-tools/}
+                }
+            </citation>
+    </citations>
+</tool>