diff blat.xml @ 10:c449963debd5 draft

planemo upload commit c5f669e334bf65c1158855bd1eef88df78a8028c
author iuc
date Mon, 21 Nov 2022 11:12:14 +0000
parents a9d863528f48
children 2a89f630fa85
line wrap: on
line diff
--- a/blat.xml	Tue Jul 10 13:04:09 2018 -0400
+++ b/blat.xml	Mon Nov 21 11:12:14 2022 +0000
@@ -1,131 +1,195 @@
-<?xml version="1.0"?>
-<tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.2">
-    <description>Standalone blat sequence search command line tool</description>
+<tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>BLAST-like sequence alignment tool</description>
+    <macros>
+        <token name="@TOOL_VERSION@">377</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">blat</xref>
+    </xrefs>
     <requirements>
-      <requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">ucsc-blat</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-      #if str($reference_source.reference_source_selector) == "history":
-            #set $reference_fasta_filename = "localref.fa" 
-            ln -s '${reference_source.database}' '${reference_fasta_filename}' &&
-      #else:
-            #set $reference_fasta_filename = str($reference_source.database.fields.path)
-      #end if
-      blat 
-            #if $noHead == "yes"
-            -noHead
-            #end if
-            -q=$query_type
-            -t=$database_type
-            #if $mask_type.mask == "file.out":
-                  -mask=$mask_type.mask_file
-            #else:
-                  -mask=$mask_type.mask
-            #end if
-            '$reference_fasta_filename'
-            '${query}'
-            output
-      && sort -k 10,10 -k 12,12n output > '${output_sorted}'
-]]></command>
-      <inputs>
-            <conditional name="reference_source">
-                  <param name="reference_source_selector" type="select" label="Choose the source for the database">
-                        <option value="cached">Locally cached</option>
-                        <option value="history">History</option>
-                  </param>
-                  <when value="cached">
-                        <param name="database" type="select" label="Select database">
-                              <options from_data_table="all_fasta">
-                                    <filter type="sort_by" column="2" />
-                              </options>
-                              <validator type="no_options" message="A built-in database is not available" />
-                        </param>
-                  </when>
-                  <when value="history">
-                        <param name="database" type="data" format="fasta,twobit" label="Using database file, either a .fa, .nib or .2bit file" />
-                  </when>
-            </conditional>
-            <param type="data" name="query" format="fasta,twobit" label="Query data, either a .fa, .nib or .2bit file"/>
-            <param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dnax" argument="-t">
-                  <option value="dna">dna - DNA sequence</option>
-                  <option value="prot">prot - protein sequence</option>
-                  <option value="dnax" selected="true">dnax - DNA sequence translated in six frames to protein</option>
+    #if str($reference_source.reference_source_selector) == "history":
+        ## blat depends on file extension
+        #if $reference_source.database.is_of_type("fasta"):
+            #set $reference_fasta_filename = "localref.fa"
+        #elif $reference_source.database.is_of_type("twobit"):
+            #set $reference_fasta_filename = "localref.2bit"
+        #else
+            #set $reference_fasta_filename = "localref"
+        #end if
+        ln -s '$reference_source.database' '$reference_fasta_filename' &&
+    #else:
+        #set $reference_fasta_filename = str($reference_source.database.fields.path)
+    #end if
+
+    blat
+        -q=$query_type
+        -t=$database_type
+        $oneOff
+        #if str($minScore)
+            -minScore=$minScore
+        #end if
+        -maxGap=$maxGap
+        #if str($repMatch)
+            -repMatch=$repMatch
+        #end if
+        #if $mask_type.mask == "file.out":
+            -mask='$mask_type.mask_file'
+        #else:
+            -mask=$mask_type.mask
+        #end if
+        #if str($dots)
+            -dots=$dots
+        #end if
+        $trimT
+        $noTrimA
+        $trimHardA
+        $fastMap
+        $fine
+        #if str($maxIntron)
+            -maxIntron=$maxIntron
+        #end if
+        $extendThroughN
+        '$reference_fasta_filename'
+        '$query'
+        -out=$out
+        '$output'
+    ]]></command>
+    <inputs>
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Choose the source for the database">
+                <option value="cached">Locally cached</option>
+                <option value="history">History</option>
             </param>
-            <param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is rnax" argument="-q">
-                  <option value="dna">dna - DNA sequence </option>
-                  <option value="rna">rna - RNA sequence</option>
-                  <option value="prot">prot - protein sequence</option>
-                  <option value="dnax">dnax - DNA sequence translated in six frames to protein</option>
-                  <option value="rnax" selected="true">rnax - DNA sequence translated in three frames to protein</option>
-            </param>
-            <param type="select" name="noHead" format="text" label="Suppresses .psl header (so it's just a tab-separated file)." >
-                  <option value="yes" selected="true">Yes, suppresses .psl header</option>
-                  <option value="no">No, do not suppresses .psl header</option>
+            <when value="cached">
+                <param name="database" type="select" label="Select database">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                    <validator type="no_options" message="A built-in database is not available" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="database" type="data" format="fasta, twobit" label="Using database file, either a .fa, .nib or .2bit file" />
+            </when>
+        </conditional>
+        <param name="query" type="data" format="fasta, twobit" label="Query data, either a .fa, .nib or .2bit file"/>
+        <param argument="-t" name="database_type" type="select" format="txt" multiple="false" label="database type" help="Choose your database type, the default is dnax">
+            <option value="dna">dna - DNA sequence</option>
+            <option value="prot">prot - protein sequence</option>
+            <option value="dnax" selected="true">dnax - DNA sequence translated in six frames to protein</option>
+        </param>
+        <param argument="-q" name="query_type" type="select" format="txt" multiple="false" label="query type" help="Choose your query type, the default is rnax">
+            <option value="dna">dna - DNA sequence </option>
+            <option value="rna">rna - RNA sequence</option>
+            <option value="prot">prot - protein sequence</option>
+            <option value="dnax">dnax - DNA sequence translated in six frames to protein</option>
+            <option value="rnax" selected="true">rnax - DNA sequence translated in three frames to protein</option>
+        </param>
+        <param argument="-oneOff" type="boolean" truevalue="-oneOff=1" falsevalue="" label="If set, this allows one mismatch in tile and still triggers an alignments" />
+        <param argument="-minScore" type="integer" value="30" label="Minimum score" help="It is the matches minus the mismatches minus some sort of gap penalty" />
+        <param argument="-maxGap" type="integer" value="2" min="0" max="3" label="Maximum gap between tiles in a clump" help="Usually set from 0 to 3. Only relevant for minMatch > 1" />
+        <param argument="-repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" />
+        <conditional name="mask_type">
+            <param  argument="-mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored entirely in protein or translated searches. Default is lower">
+                <option value="lower" selected="true">lower - mask out lower-cased sequence</option>
+                <option value="upper">upper - mask out upper-cased sequence</option>
+                <option value="out">out - mask according to database.out RepeatMasker .out file</option>
+                <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
             </param>
-            <conditional name="mask_type">
-                  <param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region
-                  but may extend through it in nucleotide searches.  Masked areas
-                  are ignored entirely in protein or translated searches. Default is lower" argument="-mask">
-                        <option value="lower" selected="true">lower - mask out lower-cased sequence</option>
-                        <option value="upper">upper - mask out upper-cased sequence</option>
-                        <option value="out">out - mask according to database.out RepeatMasker .out file</option>
-                        <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
-                  </param>
-                  <when value="lower" />
-                  <when value="upper" />
-                  <when value="out" />
-                  <when value="file.out">
-                        <param name="mask_file" type="data" format="txt" label="RepeatMasker file.out" />
-                  </when>  
-            </conditional>
-      </inputs>
-      <outputs>
-            <data format="psl" name="output_sorted" label="${tool.name} on ${on_string}"></data>
-      </outputs>
-  <tests>
-      <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata -->
-      <test>
+            <when value="lower" />
+            <when value="upper" />
+            <when value="out" />
+            <when value="file.out">
+                <param name="mask_file" type="data" format="txt" label="RepeatMasker file.out" />
+            </when>
+        </conditional>
+        <param argument="-dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log" help="Dots show program's progress" />
+        <param argument="-trimT" type="boolean" truevalue="-trimT" falsevalue="" label="Trim leading poly-T" />
+        <param argument="-noTrimA" type="boolean" truevalue="-noTrimA" falsevalue="" label="Don't trim trailing poly-A" />
+        <param argument="-trimHardA" type="boolean" truevalue="-trimHardA" falsevalue="" label="Remove poly-A tail from qSize and alignments in .psl output" />
+        <param argument="-fastMap" type="boolean" truevalue="-fastMap" falsevalue="" label="Run for fast DNA/DNA remapping" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" />
+        <param argument="-fine" type="boolean" truevalue="-fine" falsevalue="" label="Refine search for small initial and terminal exons" help="For high-quality mRNAs. Not recommended for ESTs" />
+        <param argument="-maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size" />
+        <param argument="-extendThroughN" type="boolean" truevalue="-extendThroughN" falsevalue="" label="Allow extension of alignment through large blocks of N's" />
+        <param name="out" type="select" label="Select output file format (-out)">
+            <option value="psl">Tab-separated format, no sequence (psl)</option>
+            <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option>
+            <option value="axt">Blastz-associated axt format (axt)</option>
+            <option value="maf">Multiz-associated maf format (maf)</option>
+            <option value="sim4">Similar to sim4 format (sim4)</option>
+            <option value="wublast">Similar to WU-BLAST format (wublast)</option>
+            <option value="blast">Similar to NCBI BLAST format (blast)</option>
+            <option value="blast8">NCBI BLAST tabular format (blast8)</option>
+            <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}">
+            <change_format>
+                <when input="out" value="axt" format="axt" />
+                <when input="out" value="maf" format="maf" />
+                <when input="out" value="sim4" format="txt" />
+                <when input="out" value="wublast" format="tabular" />
+                <when input="out" value="blast" format="tabular" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <!-- test on query of GenBank RefSeq records for Gallus gallus and database of Amazona vittata -->
+        <test>
             <param name="reference_source_selector" value="history" />
             <param name="database" value="amaVit1_Gallus/amaVit1.fa" />
             <param name="query" value="amaVit1_Gallus/Gallus_gallus_RefSeq.fa" />
-            <param name="database_type" value="dnax" />    
-            <param name="query_type" value="rnax" />     
-            <param name="noHead" value="yes" />
+            <param name="database_type" value="dnax" />
+            <param name="query_type" value="rnax" />
             <param name="mask" value="lower" />
-            <output name="output_sorted" value="amaVit1_Gallus/amaVit1_Gallus_gallus.psl" />
-      </test>
-      <!-- test on query of partial mRNA of Drosophila melanogaster and the database of Drosophila biamipes dot chromosome -->
-      <test>
+            <param name="out" value="psl -noHead" />
+            <output name="output" value="amaVit1_Gallus/amaVit1_Gallus_gallus_sorted.psl"  sort="true"/>
+        </test>
+        <!-- test on query of partial mRNA of Drosophila melanogaster and the database of Drosophila biamipes dot chromosome -->
+        <test>
             <param name="reference_source_selector" value="history" />
             <param name="database" value="dbia3/dbia3.fa" />
             <param name="query" value="dbia3/dmel-transcript.fa" />
-            <param name="database_type" value="dnax" />    
-            <param name="query_type" value="rnax" />     
-            <param name="noHead" value="yes" />
+            <param name="database_type" value="dnax" />
+            <param name="query_type" value="rnax" />
             <param name="mask" value="lower" />
-            <output name="output_sorted" value="dbia3/dbia3.sorted.psl" />
-      </test>
-      <!-- test on the database masked by repeat masker -->
-      <test>
+            <param name="out" value="psl -noHead" />
+            <param name="maxIntron" value="" />
+            <output name="output" value="dbia3/dbia3.sorted.psl" sort="true"/>
+        </test>
+        <!-- test on the database masked by repeat masker -->
+        <test>
             <param name="reference_source_selector" value="history" />
             <param name="database" value="dbia3/dbia3_masked.2bit" />
             <param name="query" value="dbia3/dmel-transcript.fa" />
-            <param name="database_type" value="dnax" />    
-            <param name="query_type" value="rnax" />     
-            <param name="noHead" value="yes" />
+            <param name="database_type" value="dnax" />
+            <param name="query_type" value="rnax" />
+            <param name="oneOff" value="false" />
+            <param name="minScore" value="30" />
+            <param name="maxGap" value="2" />
+            <param name="trimT" value="false" />
+            <param name="noTrimA" value="false" />
+            <param name="fine" value="false" />
+            <param name="maxIntron" value="750000" />
+            <param name="extendThroughN" value="false" />
             <param name="mask" value="file.out" />
             <param name="mask_file" value="dbia3/dbia3_RM.out" />
-            <output name="output_sorted" value="dbia3/dbia3_masked.sorted.psl" />
-      </test>
-
-  </tests> 
-  <help>
+            <param name="out" value="psl -noHead" />
+            <output name="output" value="dbia3/dbia3_masked.sorted.psl"/>
+        </test>
+    </tests>
+    <help>
         <![CDATA[
 BLAT
 ====
-BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. 
+BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments.
 
-blat (version: v340)- Standalone blat sequence search command line tool. 
+blat (version: v36)- Standalone blat sequence search command line tool.
 -------------------------------------------------------------------------
 
 usage:
@@ -139,25 +203,20 @@
    -ooc=11.ooc tells the program to load over-occurring 11-mers from
    an external file.  This will increase the speed
    by a factor of 40 in many cases, but is not required.
-   output.psl is the name of the output file.  
+   output.psl is the name of the output file.
 
 documentation:
 ++++++++++++++
 
-See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) 
+See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html)
 
 Source code:
 ++++++++++++
 
 http://hgdownload.cse.ucsc.edu/admin/exe/
 
-]]></help>  
-<citations>
-      <citation type="doi">10.1101/gr.229202</citation>
-</citations> 
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1101/gr.229202</citation>
+    </citations>
 </tool>
-             
-            
-
-               
-    
\ No newline at end of file