changeset 0:6095db402811 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme_chip commit 9068afc3812495aaf88c9a2f5a224c634d634742
author iuc
date Fri, 20 Apr 2018 09:03:44 -0400
parents
children 091a9d638d78
files get_meme_motif_databases.py macros.xml meme_chip.xml meme_motif_databases.loc.sample test-data/input1.fasta test-data/output1.html tool_data_table_conf.xml.sample
diffstat 7 files changed, 412 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_meme_motif_databases.py	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,11 @@
+import os
+
+
+def get_meme_motif_database_options(file_path):
+    options = []
+    if not os.path.isdir(file_path):
+        return options
+    for i, file_name in enumerate(os.listdir(file_path)):
+        full_path = os.path.join(file_path, file_name)
+        options.append((file_name, full_path, i == 0))
+    return options
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,11 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">4.11.2</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.3.23">graphicsmagick</requirement>
+            <requirement type="package" version="4.11.2">meme</requirement>
+        </requirements>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/meme_chip.xml	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,224 @@
+<tool id="meme_chip" name="MEME-ChIP" version="4.11.2">
+    <description>- motif discovery, enrichment analysis and clustering on large nucleotide datasets</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <code file="get_meme_motif_databases.py" />
+    <command detect_errors="exit_code"><![CDATA[
+#import os
+#set primary_output = $os.path.join($output.files_path, "index.html")
+meme-chip '$input'
+-noecho
+#if $control:
+    -neg '$control'
+#end if
+$sequence_alphabet
+-o '$output.files_path'
+#if str($options_type_cond.options_type)=='advanced':
+    ## FIXME: CentriMo cannot be run,  See the comments in the input section.
+    ## #set run_centrimo = $options_type_cond.run_centrimo_cond.run_centrimo
+    ## #if str($run_centrimo) == "yes":
+    ##     -db $options_type_cond.run_centrimo_cond.meme_motif_databases.fields.path
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_local:
+    ##         -centrimo-local
+    ##     #end if
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_score:
+    ##         -centrimo-score $options_type_cond.run_centrimo_cond.centrimo_score
+    ##     #end if
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_maxreg:
+    ##         -centrimo-maxreg $options_type_cond.run_centrimo_cond.centrimo_maxreg
+    ##     #end if
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_ethresh:
+    ##         -centrimo-ethresh $options_type_cond.run_centrimo_cond.centrimo_ethresh
+    ##     #end if
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_noseq:
+    ##         -centrimo-noseq
+    ##     #end if
+    ##     #if $options_type_cond.run_centrimo_cond.centrimo_flip:
+    ##         -centrimo-flip
+    ##     #end if
+    ## #end if
+    $options_type_cond.search_given_strand
+    -order $options_type_cond.background_model_order
+    #if str($options_type_cond.subsampling_cond.subsampling) == "no":
+        -norand
+        #if $options_type_cond.subsampling_cond.subsampling.seed:
+            -seed $options_type_cond.subsampling_cond.subsampling.seed
+        #end if
+    #end if
+    #if $options_type_cond.nmeme:
+        -nmeme $options_type_cond.nmeme
+    #end if
+    #if $options_type_cond.ccut:
+        -ccut $options_type_cond.ccut
+    #end if
+    -group-thresh $options_type_cond.group_threash
+    #if str($options_type_cond.group_weak):
+        -group-weak $options_type_cond.group_weak
+    #end if
+    -filter-thresh $options_type_cond.filter_thresh
+    $options_type_cond.old_clustering
+    -meme-mod $options_type_cond.meme_mod
+    #if $options_type_cond.meme_minw:
+        -meme-minw $options_type_cond.meme_minw
+    #end if
+    #if $options_type_cond.meme_maxw:
+        -meme-maxw $options_type_cond.meme_maxw
+    #end if
+    #if $options_type_cond.meme_nmotifs:
+        -meme-nmotifs $options_type_cond.meme_nmotifs
+    #end if
+    #if $options_type_cond.meme_minsites:
+        -meme-minsites $options_type_cond.meme_minsites
+    #end if
+    #if $options_type_cond.meme_maxsites:
+        -meme-maxsites $options_type_cond.meme_maxsites
+    #end if
+    $options_type_cond.meme_pal
+    -dreme-e $options_type_cond.dreme_e
+    -dreme-m $options_type_cond.dreme_m
+    -spamo-skip
+    -fimo-skip
+#end if
+&& rm '$output'
+&& ln -s $primary_output '$output'
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Primary sequences" help="Nucleotide sequences must have equal length"/>
+        <param name="control" type="data" format="fasta" optional="true" label="Control sequences" help="If no selection, positive sequences in the input are shuffled to create the negative set"/>
+        <param name="sequence_alphabet" type="select" label="Sequence alphabet">
+            <option value="-dna" selected="true">DNA</option>
+            <option value="-rna">RNA</option>
+        </param>
+        <conditional name="options_type_cond">
+            <param name="options_type" type="select" label="Options Configuration">
+                <option value="basic" selected="true">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic"/>
+            <when value="advanced">
+                <!--
+                FIXME: CentriMo cannot be run since the tool form cannot populate the mem_motif_database select list below.
+                <conditional name="run_centrimo_cond">
+                    <param name="run_centrimo" type="select" label="Run TOMTOM and CentriMo?">
+                        <option value="yes" selected="true">Yes</option>
+                        <option value="no">No</option>
+                    </param>
+                    <when value="yes">
+
+                        We have 2 dynamic select lists here.  The first select list (meme_motif_database_dir) is populated from the meme_motif_databases
+                        data table.  The second select list (meme_motif_database) is dynamically re-rendered whenever the selection in the meme_motif_database_dir
+                        select list is changed.  This composition used to work (see Examples->Dynamic Options section of
+                        https://docs.galaxyproject.org/en/latest/dev/schema.html) but no longer does.  We'll have to figure out what is broken in
+                        the dynamic options code in ~/parameters/basic.py in order to uncomment this block.
+
+                        <param name="meme_motif_database_dir" type="select" label="Select the motifs (DNA)" refresh_on_change="True">
+                            <options from_data_table="meme_motif_databases">
+                                <filter type="sort_by" column="1"/>
+                                <validator type="no_options" message="No MEME motif databases are available for the selected input"/>
+                            </options>
+                        </param>
+                        <param name="meme_motif_database" type="select" label="MEME motif database" dynamic_options="get_meme_motif_database_options(file_path=meme_motif_database_dir)"/>
+                        <param name="centrimo_local" type="boolean" truevalue="true" falsevalue="" checked="False" label="Compute enrichment of all regions"/>
+                        <param name="centrimo_score" type="integer" optional="true" value="0" min="0" label="Minimum allowed CentriMo match score"/>
+                        <param name="centrimo_maxreg" type="integer" optional="true" value="0" min="0" label="Maximum CentriMo region size to be considered"/>
+                        <param name="centrimo_ethresh" type="integer" optional="true" value="0" min="0" label="CentriMo E-value threshold for reporting" />
+                        <param name="centrimo_noseq" type="boolean" truevalue="true" falsevalue="" checked="False" label="Store CentriMo sequence IDs in the output"/>
+                        <param name="centrimo_flip" type="boolean" truevalue="true" falsevalue="" checked="False" label="Reflect CentriMo matches on reverse strand around center"/>
+                    </when>
+                    <when value="no"/>
+                </conditional>
+                -->
+                <param name="background_model_order" type="select" label="Select the order of the Markov background model">
+                    <option value="0">0-order model of sequences</option>
+                    <option value="1" selected="True">1st order model of sequences</option>
+                    <option value="2">2nd order model of sequences</option>
+                    <option value="3">3rd order model of sequences</option>
+                    <option value="4">4th order model of sequences</option>
+                </param>
+                <param name="nmeme" type="integer" optional="true" value="" min="1" label="Limit of sequences to pass to MEME"/>
+                <conditional name="subsampling_cond">
+                    <param name="subsampling" type="select" label="Should subsampling be random?" help="Select 'No' if your input sequences are sorted in order of confidence (best to worst)">
+                        <option value="yes" selected="true">Yes</option>
+                        <option value="no">No</option>
+                    </param>
+                    <when value="yes">
+                        <param name="seed" type="integer" optional="true" value="" min="1" label="Seed for the randomized selection of sequences"/>
+                    </when>
+                    <when value="no"/>
+                </conditional>
+                <param name="ccut" type="integer" optional="true" value="100" min="0" label="maximum size of a sequence before it is cut down to a centered section" help="Zero value indicates the sequences should not be cut down"/>
+                <param name="group_threash" type="float" value="0.05" min="0" label="Primary threshold for clustering motifs" />
+                <param name="group_weak" type="float" optional="true" value="0" min="0" label="Secondary threshold for clustering motifs" help="Zero value results in 2*primary threshold"/>
+                <param name="filter_thresh" type="float" value="0.05" min="0" label="E-value threshold for including motifs"/>
+                <param name="search_given_strand" type="boolean" truevalue="-norc" falsevalue="" checked="False" label="Search given strand only"/>
+                <param argument="-old_clustering" type="boolean" truevalue="-old_clustering" falsevalue="" checked="False" label="Pick cluster seed motifs based only on significance"/>
+                <param name="meme_mod" type="select" label="What is the expected motif site distribution?">
+                    <option value="oops" selected="True">One occurance per sequence</option>
+                    <option value="zoops">Zero or one occurances per sequence</option>
+                    <option value="anr">Any number of repititions</option>
+                </param>
+                <param name="meme_minw" type="integer" optional="true" value="0" min="0" label="Minimum motif width"/>
+                <param name="meme_maxw" type="integer" optional="true" value="0" min="0" label="Maximum motif width"/>
+                <param name="meme_nmotifs" type="integer" optional="true" value="0" min="0" label="Maximum number of motifs to find"/>
+                <param name="meme_minsites" type="integer" optional="true" value="0" min="0" label="Minimum number of sites per motif"/>
+                <param name="meme_maxsites" type="integer" optional="true" value="0" label="Maximum number of sites per motif"/>
+                <param argument="-meme_pal" type="boolean" truevalue="-meme-pal" falsevalue="" checked="False" label="Look for palindromes only"/>
+                <param name="dreme_e" type="float" value="0.05" min="0" label="Stop DREME searching after reaching this E-value threshold"/>
+                <param name="dreme_m" type="integer" value="10" min="1" label="Stop DREME searching after finding this many motifs" />
+            </when>
+        </conditional>
+        <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
+            <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="html" label="${tool.name} (html) on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input1.fasta" ftype="fasta"/>
+            <param name="non_commercial_use" value="True"/>
+            <output name="output" file="output1.html" ftype="html" compare="contains"/>
+        </test>
+        <test>
+            <param name="input" value="input1.fasta" ftype="fasta"/>
+            <param name="sequence_alphabet" value="-rna"/>
+            <param name="options_type" value="advanced"/>
+            <param name="background_model_order" value="0"/>
+            <param name="non_commercial_use" value="True"/>
+            <output name="output" file="output1.html" ftype="html" compare="contains"/>
+        </test>
+    </tests>
+    <help>
+.. class:: warningmark
+
+**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted.
+Before using, be sure to review, agree, and comply with the license.**
+
+MWMW-ChIP perform motif discovery, motif enrichment analysis and clustering on large nucleotide datasets.
+
+If you want to specify sequence weights, you must include them at the top of your input FASTA file.
+
+MEME discovers novel, ungapped motifs (recurring, fixed-length patterns) in your sequences (sample output from sequences).
+MEME splits variable-length patterns into two or more separate motifs.  A motif is a sequence pattern that occurs repeatedly
+in a group of related sequences.  MEME represents motifs as position-dependent letter-probability matrices which describe the
+probability of each possible letter at each position in the pattern.  Individual MEME motifs do not contain gaps.  Patterns
+with variable-length gaps are split by MEME into two or more separate motifs.  MEME takes as input a group of sequences and
+outputs as many motifs as requested.  MEME uses statistical modeling techniques to automatically choose the best width, number
+of occurrences, and description for each motif.
+
+.. class:: infomark
+
+For detailed information on MEME, click here_, or view the license_.
+
+.. _here: http://meme-suite.org/doc/meme.html?man_type=web
+.. _license: http://meme-suite.org/doc/copyright.html?man_type=web
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btr189</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/meme_motif_databases.loc.sample	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,7 @@
+# This file has the format (white space characters are TAB characters):
+#
+#<value>	<name>	<path>	<description>
+#
+#So, meme_motif_databases.loc could look something like this:
+#
+#2017_12	2017_12	/meme_motif_databases/2017_12	December 2017
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input1.fasta	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTCAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output1.html	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,86 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>MEME ChIP</title>
+  </head>
+  <body onload="page_loaded()" onpageshow="page_shown(event)" 
+    onresize="page_resize()" onscroll="delayed_process_draw_tasks()">
+    <div class="prog_logo big">
+      <h1>MEME-ChIP</h1>
+      <h2>Motif Analysis of Large Nucleotide Datasets</h2>
+    </div>
+    <p>
+      If you use MEME-ChIP in your research, please cite the following paper:<br />
+    </p>
+    <!-- navigation -->
+    <div class="pad2">
+      <a class="jump" href="#data_sec">Motifs</a>
+      &nbsp;&nbsp;|&nbsp;&nbsp;
+      <a class="jump" href="#programs_sec">Programs</a>
+      &nbsp;&nbsp;|&nbsp;&nbsp;
+      <a class="jump" href="#input_sec">Input Files</a>
+      &nbsp;&nbsp;|&nbsp;&nbsp;
+      <a class="jump" href="#info_sec">Program information</a>
+    </div>
+    <!-- alert the user when their browser is not up to the task -->
+    <noscript><h1 style="color:red">Javascript is required to view these results!</h1></noscript>
+    <h1 id="html5_warning" style="color:red; display:none;">Your browser does not support canvas!</h1>
+    <script>if (!window.HTMLCanvasElement) $("html5_warning").style.display = "block";</script>
+    <!-- write out the job description -->
+    <span id="ins_desc"></span>
+    <script>make_description($("ins_desc"), data["description"]);</script>
+    <!-- write out clustered motifs -->
+    <h2 class="mainh pad2" id="data_sec">Motifs</h2>
+    <div class="box">
+      <p>The significant motifs 
+      (E-value &le; <span id="ins_filter_thresh"></span>)
+      found by the programs MEME, DREME and CentriMo; 
+      clustered by similarity and ordered by E-value.</p>
+      <script>$("ins_filter_thresh").innerHTML = data["filter_thresh"]; </script>
+      <div class="motifbox">
+        <span class="action" onclick="show_all(true)">Expand All Clusters</span>
+        <span class="action" onclick="show_all(false)">Collapse All Clusters</span>
+      </div>
+      <div id="logos"></div>
+      <script>make_clustered_motifs($("logos"));</script>
+    </div>
+    <!-- write out a list of all programs run -->
+    <h2 class="mainh pad2" id="programs_sec">Programs</h2>
+    <div class="box" id="program_listing"></div>
+    <script>make_program_listing($("program_listing"));</script>
+    <!-- write out input files -->
+    <h2 id="input_sec" class="mainh pad2">Input Files</h2>
+    <div id="input_files" class="box">
+      <div id="sequence_db"></div>
+      <script>make_sequence_db_listing('sequence_db', "Primary Sequences");</script>
+      <div id="neg_sequence_db"></div>
+      <script>make_sequence_db_listing('neg_sequence_db', "Control Sequences");</script>
+      <h4 id="motif_dbs_header">Motifs</h4>
+      <div id="motif_dbs"></div>
+      <script>make_motif_db_listing($("motif_dbs"));</script>
+    </div>
+    <!-- list information on this program -->
+    <div id="info_sec" class="bar">
+      <div class="subsection">
+        <h5 id="version">MEME-ChIP version</h5>
+        <span id="ins_version"></span> 
+        (Release date: <span id="ins_release"></span>)<br>
+      </div>
+      <script>
+        $("ins_version").innerHTML = data["version"];
+        $("ins_release").innerHTML = data["release"];
+      </script>
+      <div class="subsection">
+        <h5 id="reference">Reference</h5>
+        <div class="subsection">
+          <h5 id="command">Command line summary</h5>
+          <textarea id="cmd" rows="5" style="width:100%;" readonly="readonly">
+          </textarea>
+          <script>$("cmd").value = data["cmd"].join(" ");</script>
+        </div>
+      </div>
+    </div>
+    <div id="scrollpad"></div>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Apr 20 09:03:44 2018 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of meme_motif_databases -->
+    <table name="meme_motif_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/meme_motif_databases.loc" />
+    </table>
+</tables>