diff fimo.xml @ 13:4eb02864e5df draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit e2cf796f991cbe8c96e0cc5a0056b7255ac3ad6b
author iuc
date Thu, 17 May 2018 14:11:00 -0400
parents c470b36b592d
children c1a9a297ba8b
line wrap: on
line diff
--- a/fimo.xml	Wed Apr 25 12:12:47 2018 -0400
+++ b/fimo.xml	Thu May 17 14:11:00 2018 -0400
@@ -1,59 +1,59 @@
 <tool id="meme_fimo" name="FIMO" version="@WRAPPER_VERSION@.0">
-    <description>- Scan a set of sequences for motifs.</description>
+    <description>- Scan a set of sequences for motifs</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
-mkdir -p output &&
-python '$__tool_directory__/fimo_wrapper.py'
---input_motifs '${input_motifs}'
-#if str($fasta_type.fasta_type_selector) == 'history':
-    --input_fasta '${fasta_type.input_database}'
-#else:
-    --input_fasta '${fasta_type.input_database.fields.path}'
-#end if
---options_type $options_type.options_type_selector
-#if str($options_type.options_type_selector) == 'advanced':
-    --alpha '${options_type.alpha}'
-    #if str($options_type.bgfile_type.bgfile_type_selector) == 'motif_file':
-        --bgfile 'motif-file'
-    #elif str($options_type.bgfile_type.bgfile_type_selector) == 'bgfile':
-        --bgfile '${options_type.bgfile_type.bgfile}'
-    #end if
-    ${options_type.max_strand}
-    --max_stored_scores '${options_type.max_stored_scores}'
-    #if str($options_type.motifs_cond.motifs_selector) == 'no':
-        #for $motif in $options_type.motifs:
-            --motif '${motif.motif}'
-        #end for
+fimo
+    -o ./out/
+    $scanrc
+    #if str( $options_type.options_type_selector ) == 'advanced':
+        --thresh $options_type.thresh
+        $options_type.qv_thresh
+        #if str( $options_type.bgfile_type.bgfile_type_selector ) == 'motif_file':
+            --bgfile --motif--
+        #elif str( $options_type.bgfile_type.bgfile_type_selector ) == 'bgfile':
+            --bgfile $options_type.bgfile_type.bgfile
+        #elif str( $options_type.bgfile_type.bgfile_type_selector ) == 'uniform_distr':
+            --bgfile --uniform--
+        #end if
+        $options_type.max_strand
+        --max-stored-scores $options_type.max_stored_scores
+
+        #if str( $options_type.motifs_cond.motif_selector) == 'yes':
+            #for $motif in $options_type.motifs_cond.motifs:
+                --motif $motif.motif
+            #end for
+        #end if
+
+        --motif-pseudo $options_type.motif_pseudo
+        $options_type.no_qvalue
+        $options_type.parse_genomic_coords
+        
+        #if str( $options_type.psp_cond.psp_selector ) == 'yes':
+            --psp $options_type.psp_cond.input_psp
+            --alpha $options_type.psp_cond.alpha
+        #end if
+
+        #if str( $options_type.prior_dist_cond.prior_dist_selector ) == 'yes':
+            --prior-dist $options_type.prior_dist_cond.input_prior_dist
+        #end if
     #end if
-    --output_separate_motifs ${options_type.output_separate_motifs}
-    --motif_pseudo '${options_type.motif_pseudo}'
-    ${options_type.no_qvalue}
-    ${options_type.norc}
-    #if str($options_type.parse_genomic_coord_cond.parse_genomic_coord) == 'yes':
-        --parse_genomic_coord 'yes'
-        --remove_duplicate_coords ${options_type.parse_genomic_coord_cond.remove_duplicate_coords}
-    #end if
-    #if str($options_type.psp_cond.psp_selector) == 'yes':
-        --input_psp '${input_psp}'
+    '$input_motifs'
+    #if str($fasta_type.fasta_type_selector) == 'history':
+        '${fasta_type.input_database}'
+    #else:
+        '${fasta_type.input_database.fields.path}'
     #end if
-    #if str($options_type.prior_dist_cond.prior_dist_selector) == 'yes':
-        --input_prior_dist '${input_prior_dist}'
-    #end if
-    ${options_type.qv_thresh}
-    --thresh ${options_type.thresh}
-#end if
---output_path '${html_outfile.files_path}'
---html_output '${html_outfile}'
---interval_output '${interval_outfile}'
---txt_output '${txt_outfile}'
---xml_output '${xml_outfile}'
---gff_output '${gff_outfile}'
+&& mv ./out/fimo.html '${html_outfile}'
+&& mv ./out/fimo.txt '${txt_outfile}'
+&& mv ./out/fimo.xml '${xml_outfile}'
+&& mv ./out/fimo.gff '${gff_outfile}'
     ]]></command>
     <inputs>
-        <param name="input_motifs" type="data" format="memexml" label="'MEME output' formatted file"/>
+        <param name="input_motifs" type="data" format="memexml" label="DREME or MEME output XML file"
+               help="DREME or MEME output XML file containing found motifs"/>
         <conditional name="fasta_type">
             <param name="fasta_type_selector" type="select" label="Source for sequence to search">
                 <option value="cached">Locally Cached sequences</option>
@@ -68,60 +68,63 @@
                 <param format="fasta" name="input_database" type="data" label="Sequences"/>
             </when>
         </conditional>
+        <param name="scanrc" label="Check reverse complement strand" type="boolean"
+               truevalue="" falsevalue="--norc" checked="False"
+               help="Search for motifs also on reverse complement strand"/>
         <conditional name="options_type">
             <param name="options_type_selector" type="select" label="Options configuration">
                 <option value="basic" selected="true">Basic</option>
                 <option value="advanced">Advanced</option>
             </param>
-            <when value="basic" />
+            <when value="basic"/>
             <when value="advanced">
-                <param name="alpha" type="float" value="1.0" min="0" max="1.0" label="Alpha parameter for calculating position specific priors" help="Represents the fraction of all transcription factor binding sites that are binding sites for the TF of interest (must be between 0 and 1)."/>
+                <param name="thresh" type="float" value="1e-4" argument="--thresh"
+                label="Output threshold for displaying search results" 
+                help="Only output results with a p-value less than the given threshold. To apply set threshold to q-values rather than p-values, set --qv-thresh."/>
+                <param name="qv_thresh" label="Apply output threshold to q-values?" argument="--qv-thresh"
+                       type="boolean" truevalue="--qv_thresh" falsevalue="" checked="False" 
+                       help="Default: apply set threshold on p-values"/>
                 <conditional name="bgfile_type">
-                    <param name="bgfile_type_selector" type="select" label="Background file type">
-                        <option value="default" selected="true">Use frequencies embedded in the application from the non-redundant database</option>
-                        <option value="motif_file">Use frequencies from motif file</option>
-                        <option value="bgfile">Use frequencies from background file</option>
+                    <param name="bgfile_type_selector" type="select" label="Background model selection">
+                        <option value="default" selected="true">Use embedded letter frequencies from non-redundant DNA/protein database</option>
+                        <option value="motif_file">Use 0-order letter frequencies from motif file</option>
+                        <option value="bgfile">Use 0-order letter frequencies from background file</option>
+                        <option value="uniform_distr">Use uniform letter frequencies</option>
                     </param>
                     <when value="motif_file" />
                     <when value="default" />
+                    <when value="uniform_distr" />
                     <when value="bgfile">
-                        <param name="bgfile" type="data" format="txt" optional="True" label="Background Model" help="File must be in MEME background file format."/>
+                        <param name="bgfile" type="data" format="txt" optional="True" label="Background model file" help="File must be in Markov background model format (see MEME suite tool fasta-get-markov for details)"/>
                     </when>
                 </conditional>
-                <param name="max_strand" label="If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score" type="boolean" truevalue="--max_strand" falsevalue="" checked="False" help="If the scores are tied, the matching strand is chosen at random.  Leave unchecked to report both matches."/>
-                <param name="max_stored_scores" type="integer" value="100000" label="Maximum number of scores that will be stored" />
+                <param name="max_strand" label="Report best match in case of overlapping matches on both strands?" argument="--max-strand"
+                       type="boolean" truevalue="--max-strand" falsevalue="" checked="False" 
+                       help="If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score. If the scores are tied, the matching strand is chosen at random (default: report both matches)."/>
+                <param name="max_stored_scores" type="integer" value="100000" argument="--max-stored-scores"
+                label="Maximum number of scores that will be stored"
+                help="Keeping a complete list of scores may exceed available memory. Once the number of stored scores reaches the maximum allowed, the least significant 50% of scores will be dropped. In this case, the list of reported motifs may be incomplete and the q-value calculation will be approximate."/>
                 <conditional name="motifs_cond">
-                    <param name="motifs_selector" type="select" label="Use all motifs in input?">
-                        <option value="yes" selected="true">Yes</option>
-                        <option value="no">No</option>
+                    <param name="motif_selector" type="select" label="Specify single input motifs for scanning?">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
                     </param>
-                    <when value="yes"/>
-                    <when value="no">
-                        <repeat name="motifs" title="Limit to specified motif">
-                            <param name="motif" type="text" value="" label="Specify motif by id" />
+                    <when value="no"/>
+                    <when value="yes">
+                        <repeat name="motifs" title="Supply input motif ID">
+                            <param name="motif" type="text" value="" label="Motif ID" />
                         </repeat>
                     </when>
                 </conditional>
-                <param name="output_separate_motifs" type="select" label="Output a dataset per motif?" help="Output a collection consisting of a separate dataset for each motif in the input">
-                    <option value="no" selected="true">No</option>
-                    <option value="yes">Yes</option>
-                </param>
-                <param name="motif_pseudo" type="float" value="0.1" label="Pseudocount to add to counts in motif matrix" help="A pseudocount to be added to each count in the motif matrix, after first multiplying by the corresponding background frequency"/>
-                <param name="no_qvalue" label="Do not compute a q-value for each p-value" type="boolean" truevalue="--no_qvalue" falsevalue="" checked="True" help="The q-value calculation is that of Benjamini and Hochberg (1995)."/>
-                <param name="norc" label="Do not score the reverse complement DNA strand" type="boolean" truevalue="--norc" falsevalue="" checked="False" />
-                <conditional name="parse_genomic_coord_cond">
-                    <param name="parse_genomic_coord" label="Check each sequence header for UCSC style genomic coordinates" type="select">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="yes">
-                        <param name="remove_duplicate_coords" type="select" label="Remove duplicate entries in unique GFF coordinates?" help="Remove duplicate entries as defined by the unique GFF coordinates">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    </when>
-                    <when value="no"/>
-                </conditional>
+                <param name="motif_pseudo" type="float" value="0.1" argument="--motif-pseudo"
+                       label="Pseudocount to add to counts in motif matrix" 
+                       help="A pseudocount to be added to each count in the motif matrix, after first multiplying by the corresponding background frequency"/>
+                <param name="no_qvalue" label="Disable q-value calculation?" 
+                       type="boolean" truevalue="--no-qvalue" falsevalue="" checked="False" argument="--no-qvalue"
+                       help="The q-value calculation is that of Benjamini and Hochberg (1995) (default: calculate q-value for each p-value)"/>
+                <param name="parse_genomic_coords" label="Check each sequence header for UCSC-style genomic coordinates?" 
+                       type="boolean" truevalue="--parse-genomic-coord" falsevalue="" checked="False" argument="--parse-genomic-coord"
+                       help=" If genomic coordinates are found they will be used as the coordinates in the output."/>
                 <conditional name="psp_cond">
                     <param name="psp_selector" type="select" label="Use position-specific priors?">
                         <option value="no" selected="true">No</option>
@@ -129,7 +132,12 @@
                     </param>
                     <when value="no"/>
                     <when value="yes">
-                        <param name="input_psp" type="data" format="txt" label="Select dataset containing position specific priors" help="Format must be meme psp or wiggle."/>
+                        <param name="input_psp" type="data" format="txt" argument="--psp"
+                               label="Select file containing position-specific priors" 
+                               help="File should be in MEME PSP format or wiggle format (can be generated by MEME suite tool create-priors)"/>
+                        <param name="alpha" type="float" value="1.0" min="0" max="1.0" argument="--alpha"
+                               label="Alpha parameter for calculating position-specific priors"
+                               help="Represents the fraction of all transcription factor binding sites that are binding sites for the TF of interest (must be between 0 and 1)"/>
                     </when>
                 </conditional>
                 <conditional name="prior_dist_cond">
@@ -139,19 +147,38 @@
                     </param>
                     <when value="no"/>
                     <when value="yes">
-                        <param name="input_prior_dist" type="data" format="txt" label="Select dataset containing binned distribution of priors"/>
+                        <param name="input_prior_dist" type="data" format="txt" argument="--prior-dist"
+                               label="Select dataset containing binned distribution of priors"
+                               help="This file can be generated using the MEME suite tool create-priors"/>
                     </when>
                 </conditional>
-                <param name="qv_thresh" label="Use q-values for the output threshold" type="boolean" truevalue="--qv_thresh" falsevalue="" checked="False" help="Leave unchecked to use p-values for the output threshold."/>
-                <param name="thresh" type="float" value="1e-4" label="Output threshold for displaying search results" help="Only search results with a p-value less than the threshold will be output. The threshold can be set to use q-values rather than p-values via the option above."/>
             </when>
         </conditional>
         <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
             <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
         </param>
+        <section name="output_options" title="Additional output options">
+            <param name="html_outfile" type="boolean" value="False" label="Output HTML file" help="FIMO HTML output file"/>
+            <param name="xml_outfile" type="boolean" value="False" label="Output XML file" help="FIMO XML output file"/>
+            <param name="gff_outfile" type="boolean" value="False" label="Output GFF file" help="FIMO GFF output file"/>
+        </section>
     </inputs>
     <outputs>
+        <data format="txt" name="txt_outfile" label="${tool.name} on ${on_string} (text)">
+            <actions>
+                <conditional name="fasta_type.fasta_type_selector">
+                    <when value="cached">
+                        <action type="metadata" name="dbkey">
+                            <option type="from_data_table" name="all_fasta" column="1" offset="0">
+                                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
+                            </option>
+                        </action>
+                    </when>
+                </conditional>
+            </actions>
+        </data>
         <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)">
+            <filter>(output_options['html_outfile'] is True)</filter>
             <actions>
                 <conditional name="fasta_type.fasta_type_selector">
                     <when value="cached">
@@ -165,7 +192,8 @@
                 </conditional>
             </actions>
         </data>
-        <data format="tabular" name="txt_outfile" label="${tool.name} on ${on_string} (text)">
+        <data format="txt" name="gff_outfile" label="${tool.name} on ${on_string} (gff)">
+            <filter>(output_options['gff_outfile'] is True)</filter>
             <actions>
                 <conditional name="fasta_type.fasta_type_selector">
                     <when value="cached">
@@ -178,38 +206,8 @@
                 </conditional>
             </actions>
         </data>
-        <data format="tabular" name="gff_outfile" label="${tool.name} on ${on_string} (almost-gff)">
-            <filter>options_type['options_type_selector'] == 'basic' or (options_type['options_type_selector'] == 'advanced' and options_type['output_separate_motifs'] == 'no')</filter>
-            <actions>
-                <conditional name="fasta_type.fasta_type_selector">
-                    <when value="cached">
-                        <action type="metadata" name="dbkey">
-                            <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-                            </option>
-                        </action>
-                    </when>
-                </conditional>
-            </actions>
-        </data>
-        <collection name="motifs" type="list" label="Motifs: ${tool.name} on ${on_string}">
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="output" ext="gff" visible="false" />
-            <filter>options_type['options_type_selector'] == 'advanced' and options_type['output_separate_motifs'] == 'yes'</filter>
-        </collection>
-        <data format="cisml" name="xml_outfile" label="${tool.name} on ${on_string} (xml)">
-            <actions>
-                <conditional name="fasta_type.fasta_type_selector">
-                    <when value="cached">
-                        <action type="metadata" name="dbkey">
-                            <option type="from_data_table" name="all_fasta" column="1" offset="0">
-                                <filter type="param_value" ref="fasta_type.input_database" column="0"/>
-                            </option>
-                        </action>
-                    </when>
-                </conditional>
-            </actions>
-        </data>
-        <data format="interval" name="interval_outfile" label="${tool.name} on ${on_string} (interval)">
+        <data format="memexml" name="xml_outfile" label="${tool.name} on ${on_string} (xml)">
+            <filter>(output_options['xml_outfile'] is True)</filter>
             <actions>
                 <conditional name="fasta_type.fasta_type_selector">
                     <when value="cached">
@@ -224,46 +222,57 @@
         </data>
     </outputs>
     <tests>
-        <test>
-            <param name="input_motifs" value="meme_output_xml_1.xml" ftype="memexml"/>
+        <test expect_num_outputs="4">
+            <param name="input_motifs" value="meme_fimo_input_1.xml" ftype="memexml"/>
             <param name="fasta_type_selector" value="history"/>
-            <param name="input_database" value="phiX.fasta" ftype="fasta"/>
+            <param name="input_database" value="hsa_chrM.fa" ftype="fasta"/>
+            <param name="scanrc" value="true"/>
             <param name="options_type_selector" value="basic"/>
-            <param name="non_commercial_use" value="True"/>
-            <output name="html_outfile" file="fimo_output_html_1.html" compare="contains"/>
-            <output name="txt_outfile" file="fimo_output_txt_1.txt" compare="contains"/>
-            <output name="gff_outfile" file="fimo_output_almost-gff_1.txt" compare="contains"/>
-            <output name="xml_outfile" file="fimo_output_xml_1.xml" compare="contains"/>
-            <output name="interval_outfile" file="fimo_output_interval_1.txt" compare="contains"/>
-        </test>
-        <test>
-            <param name="input_motifs" value="meme_output_xml_1.xml" ftype="memexml"/>
-            <param name="fasta_type_selector" value="history"/>
-            <param name="input_database" value="phiX.fasta" ftype="fasta"/>
-            <param name="options_type_selector" value="advanced"/>
+            <param name="html_outfile" value="True"/>
+            <param name="xml_outfile" value="True"/>
+            <param name="gff_outfile" value="True"/>
             <param name="non_commercial_use" value="True"/>
-            <output name="html_outfile" file="fimo_output_html_2.html" compare="contains"/>
-            <output name="txt_outfile" file="fimo_output_txt_2.txt" compare="contains"/>
-            <output name="gff_outfile" file="fimo_output_almost-gff_2.txt" compare="contains"/>
-            <output name="xml_outfile" file="fimo_output_xml_2.xml" compare="contains"/>
-            <output name="interval_outfile" file="fimo_output_interval_2.txt" compare="contains"/>
+            <output name="html_outfile" file="fimo_output_test1.html" lines_diff="20"/>
+            <output name="txt_outfile" file="fimo_output_test1.txt"/>
+            <output name="xml_outfile" file="fimo_output_test1.xml" lines_diff="20"/>
+            <output name="gff_outfile" file="fimo_output_test1.gff"/>
         </test>
-        <test>
-            <param name="input_motifs" value="meme_output_xml_1.xml" ftype="memexml"/>
+        <test expect_num_outputs="4">
+            <param name="input_motifs" value="dreme_fimo_input_1.xml" ftype="memexml"/>
             <param name="fasta_type_selector" value="history"/>
-            <param name="input_database" value="phiX.fasta" ftype="fasta"/>
+            <param name="input_database" value="hsa_chrM.fa" ftype="fasta"/>
+            <param name="scanrc" value="true"/>
             <param name="options_type_selector" value="advanced"/>
-            <param name="parse_genomic_coord" value="yes"/>
-            <param name="remove_duplicate_coords" value="yes"/>
-            <param name="output_separate_motifs" value="yes"/>
+            <param name="bgfile_type_selector" value="bgfile"/>
+            <param name="bgfile" value="fimo_background_probs_hsa_chrM.txt"/>
+            <param name="html_outfile" value="True"/>
+            <param name="xml_outfile" value="True"/>
+            <param name="gff_outfile" value="True"/>
             <param name="non_commercial_use" value="True"/>
-            <output name="html_outfile" file="fimo_output_html_2.html" compare="contains"/>
-            <output name="txt_outfile" file="fimo_output_txt_2.txt" compare="contains"/>
-            <output_collection name="motifs" type="list">
-                <element name="MOTIF1.gff" file="motif1.gff" ftype="gff" compare="contains"/>
-            </output_collection>
-            <output name="xml_outfile" file="fimo_output_xml_2.xml" compare="contains"/>
-            <output name="interval_outfile" file="fimo_output_interval_2.txt" compare="contains"/>
+            <output name="html_outfile" file="fimo_output_test2.html" lines_diff="20"/>
+            <output name="txt_outfile" file="fimo_output_test2.txt"/>
+            <output name="xml_outfile" file="fimo_output_test2.xml" lines_diff="20"/>
+            <output name="gff_outfile" file="fimo_output_test2.gff"/>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="input_motifs" value="dreme_fimo_input_1.xml" ftype="memexml"/>
+            <param name="fasta_type_selector" value="history"/>
+            <param name="input_database" value="hsa_chrM.fa" ftype="fasta"/>
+            <param name="scanrc" value="true"/>
+            <param name="options_type_selector" value="advanced"/>
+            <param name="thresh" value="0.01"/>
+            <param name="bgfile_type_selector" value="uniform_distr"/>
+            <param name="motif_selector" value="yes"/>
+            <repeat name="motifs">
+                <param name="motif" value="ACTAAYH"/>
+            </repeat>
+            <param name="html_outfile" value="True"/>
+            <param name="xml_outfile" value="True"/>
+            <param name="gff_outfile" value="False"/>
+            <param name="non_commercial_use" value="True"/>
+            <output name="html_outfile" file="fimo_output_test3.html" lines_diff="20"/>
+            <output name="txt_outfile" file="fimo_output_test3.txt"/>
+            <output name="xml_outfile" file="fimo_output_test3.xml" lines_diff="20"/>
         </test>
     </tests>
     <help>