Mercurial > repos > iuc > meme_chip
changeset 0:6095db402811 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme_chip commit 9068afc3812495aaf88c9a2f5a224c634d634742
author | iuc |
---|---|
date | Fri, 20 Apr 2018 09:03:44 -0400 |
parents | |
children | 091a9d638d78 |
files | get_meme_motif_databases.py macros.xml meme_chip.xml meme_motif_databases.loc.sample test-data/input1.fasta test-data/output1.html tool_data_table_conf.xml.sample |
diffstat | 7 files changed, 412 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_meme_motif_databases.py Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,11 @@ +import os + + +def get_meme_motif_database_options(file_path): + options = [] + if not os.path.isdir(file_path): + return options + for i, file_name in enumerate(os.listdir(file_path)): + full_path = os.path.join(file_path, file_name) + options.append((file_name, full_path, i == 0)) + return options
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,11 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">4.11.2</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.3.23">graphicsmagick</requirement> + <requirement type="package" version="4.11.2">meme</requirement> + </requirements> + </xml> +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/meme_chip.xml Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,224 @@ +<tool id="meme_chip" name="MEME-ChIP" version="4.11.2"> + <description>- motif discovery, enrichment analysis and clustering on large nucleotide datasets</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <code file="get_meme_motif_databases.py" /> + <command detect_errors="exit_code"><![CDATA[ +#import os +#set primary_output = $os.path.join($output.files_path, "index.html") +meme-chip '$input' +-noecho +#if $control: + -neg '$control' +#end if +$sequence_alphabet +-o '$output.files_path' +#if str($options_type_cond.options_type)=='advanced': + ## FIXME: CentriMo cannot be run, See the comments in the input section. + ## #set run_centrimo = $options_type_cond.run_centrimo_cond.run_centrimo + ## #if str($run_centrimo) == "yes": + ## -db $options_type_cond.run_centrimo_cond.meme_motif_databases.fields.path + ## #if $options_type_cond.run_centrimo_cond.centrimo_local: + ## -centrimo-local + ## #end if + ## #if $options_type_cond.run_centrimo_cond.centrimo_score: + ## -centrimo-score $options_type_cond.run_centrimo_cond.centrimo_score + ## #end if + ## #if $options_type_cond.run_centrimo_cond.centrimo_maxreg: + ## -centrimo-maxreg $options_type_cond.run_centrimo_cond.centrimo_maxreg + ## #end if + ## #if $options_type_cond.run_centrimo_cond.centrimo_ethresh: + ## -centrimo-ethresh $options_type_cond.run_centrimo_cond.centrimo_ethresh + ## #end if + ## #if $options_type_cond.run_centrimo_cond.centrimo_noseq: + ## -centrimo-noseq + ## #end if + ## #if $options_type_cond.run_centrimo_cond.centrimo_flip: + ## -centrimo-flip + ## #end if + ## #end if + $options_type_cond.search_given_strand + -order $options_type_cond.background_model_order + #if str($options_type_cond.subsampling_cond.subsampling) == "no": + -norand + #if $options_type_cond.subsampling_cond.subsampling.seed: + -seed $options_type_cond.subsampling_cond.subsampling.seed + #end if + #end if + #if $options_type_cond.nmeme: + -nmeme $options_type_cond.nmeme + #end if + #if $options_type_cond.ccut: + -ccut $options_type_cond.ccut + #end if + -group-thresh $options_type_cond.group_threash + #if str($options_type_cond.group_weak): + -group-weak $options_type_cond.group_weak + #end if + -filter-thresh $options_type_cond.filter_thresh + $options_type_cond.old_clustering + -meme-mod $options_type_cond.meme_mod + #if $options_type_cond.meme_minw: + -meme-minw $options_type_cond.meme_minw + #end if + #if $options_type_cond.meme_maxw: + -meme-maxw $options_type_cond.meme_maxw + #end if + #if $options_type_cond.meme_nmotifs: + -meme-nmotifs $options_type_cond.meme_nmotifs + #end if + #if $options_type_cond.meme_minsites: + -meme-minsites $options_type_cond.meme_minsites + #end if + #if $options_type_cond.meme_maxsites: + -meme-maxsites $options_type_cond.meme_maxsites + #end if + $options_type_cond.meme_pal + -dreme-e $options_type_cond.dreme_e + -dreme-m $options_type_cond.dreme_m + -spamo-skip + -fimo-skip +#end if +&& rm '$output' +&& ln -s $primary_output '$output' + ]]></command> + <inputs> + <param name="input" type="data" format="fasta" label="Primary sequences" help="Nucleotide sequences must have equal length"/> + <param name="control" type="data" format="fasta" optional="true" label="Control sequences" help="If no selection, positive sequences in the input are shuffled to create the negative set"/> + <param name="sequence_alphabet" type="select" label="Sequence alphabet"> + <option value="-dna" selected="true">DNA</option> + <option value="-rna">RNA</option> + </param> + <conditional name="options_type_cond"> + <param name="options_type" type="select" label="Options Configuration"> + <option value="basic" selected="true">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="basic"/> + <when value="advanced"> + <!-- + FIXME: CentriMo cannot be run since the tool form cannot populate the mem_motif_database select list below. + <conditional name="run_centrimo_cond"> + <param name="run_centrimo" type="select" label="Run TOMTOM and CentriMo?"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + + We have 2 dynamic select lists here. The first select list (meme_motif_database_dir) is populated from the meme_motif_databases + data table. The second select list (meme_motif_database) is dynamically re-rendered whenever the selection in the meme_motif_database_dir + select list is changed. This composition used to work (see Examples->Dynamic Options section of + https://docs.galaxyproject.org/en/latest/dev/schema.html) but no longer does. We'll have to figure out what is broken in + the dynamic options code in ~/parameters/basic.py in order to uncomment this block. + + <param name="meme_motif_database_dir" type="select" label="Select the motifs (DNA)" refresh_on_change="True"> + <options from_data_table="meme_motif_databases"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No MEME motif databases are available for the selected input"/> + </options> + </param> + <param name="meme_motif_database" type="select" label="MEME motif database" dynamic_options="get_meme_motif_database_options(file_path=meme_motif_database_dir)"/> + <param name="centrimo_local" type="boolean" truevalue="true" falsevalue="" checked="False" label="Compute enrichment of all regions"/> + <param name="centrimo_score" type="integer" optional="true" value="0" min="0" label="Minimum allowed CentriMo match score"/> + <param name="centrimo_maxreg" type="integer" optional="true" value="0" min="0" label="Maximum CentriMo region size to be considered"/> + <param name="centrimo_ethresh" type="integer" optional="true" value="0" min="0" label="CentriMo E-value threshold for reporting" /> + <param name="centrimo_noseq" type="boolean" truevalue="true" falsevalue="" checked="False" label="Store CentriMo sequence IDs in the output"/> + <param name="centrimo_flip" type="boolean" truevalue="true" falsevalue="" checked="False" label="Reflect CentriMo matches on reverse strand around center"/> + </when> + <when value="no"/> + </conditional> + --> + <param name="background_model_order" type="select" label="Select the order of the Markov background model"> + <option value="0">0-order model of sequences</option> + <option value="1" selected="True">1st order model of sequences</option> + <option value="2">2nd order model of sequences</option> + <option value="3">3rd order model of sequences</option> + <option value="4">4th order model of sequences</option> + </param> + <param name="nmeme" type="integer" optional="true" value="" min="1" label="Limit of sequences to pass to MEME"/> + <conditional name="subsampling_cond"> + <param name="subsampling" type="select" label="Should subsampling be random?" help="Select 'No' if your input sequences are sorted in order of confidence (best to worst)"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param name="seed" type="integer" optional="true" value="" min="1" label="Seed for the randomized selection of sequences"/> + </when> + <when value="no"/> + </conditional> + <param name="ccut" type="integer" optional="true" value="100" min="0" label="maximum size of a sequence before it is cut down to a centered section" help="Zero value indicates the sequences should not be cut down"/> + <param name="group_threash" type="float" value="0.05" min="0" label="Primary threshold for clustering motifs" /> + <param name="group_weak" type="float" optional="true" value="0" min="0" label="Secondary threshold for clustering motifs" help="Zero value results in 2*primary threshold"/> + <param name="filter_thresh" type="float" value="0.05" min="0" label="E-value threshold for including motifs"/> + <param name="search_given_strand" type="boolean" truevalue="-norc" falsevalue="" checked="False" label="Search given strand only"/> + <param argument="-old_clustering" type="boolean" truevalue="-old_clustering" falsevalue="" checked="False" label="Pick cluster seed motifs based only on significance"/> + <param name="meme_mod" type="select" label="What is the expected motif site distribution?"> + <option value="oops" selected="True">One occurance per sequence</option> + <option value="zoops">Zero or one occurances per sequence</option> + <option value="anr">Any number of repititions</option> + </param> + <param name="meme_minw" type="integer" optional="true" value="0" min="0" label="Minimum motif width"/> + <param name="meme_maxw" type="integer" optional="true" value="0" min="0" label="Maximum motif width"/> + <param name="meme_nmotifs" type="integer" optional="true" value="0" min="0" label="Maximum number of motifs to find"/> + <param name="meme_minsites" type="integer" optional="true" value="0" min="0" label="Minimum number of sites per motif"/> + <param name="meme_maxsites" type="integer" optional="true" value="0" label="Maximum number of sites per motif"/> + <param argument="-meme_pal" type="boolean" truevalue="-meme-pal" falsevalue="" checked="False" label="Look for palindromes only"/> + <param name="dreme_e" type="float" value="0.05" min="0" label="Stop DREME searching after reaching this E-value threshold"/> + <param name="dreme_m" type="integer" value="10" min="1" label="Stop DREME searching after finding this many motifs" /> + </when> + </conditional> + <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False"> + <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator> + </param> + </inputs> + <outputs> + <data name="output" format="html" label="${tool.name} (html) on ${on_string}"/> + </outputs> + <tests> + <test> + <param name="input" value="input1.fasta" ftype="fasta"/> + <param name="non_commercial_use" value="True"/> + <output name="output" file="output1.html" ftype="html" compare="contains"/> + </test> + <test> + <param name="input" value="input1.fasta" ftype="fasta"/> + <param name="sequence_alphabet" value="-rna"/> + <param name="options_type" value="advanced"/> + <param name="background_model_order" value="0"/> + <param name="non_commercial_use" value="True"/> + <output name="output" file="output1.html" ftype="html" compare="contains"/> + </test> + </tests> + <help> +.. class:: warningmark + +**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted. +Before using, be sure to review, agree, and comply with the license.** + +MWMW-ChIP perform motif discovery, motif enrichment analysis and clustering on large nucleotide datasets. + +If you want to specify sequence weights, you must include them at the top of your input FASTA file. + +MEME discovers novel, ungapped motifs (recurring, fixed-length patterns) in your sequences (sample output from sequences). +MEME splits variable-length patterns into two or more separate motifs. A motif is a sequence pattern that occurs repeatedly +in a group of related sequences. MEME represents motifs as position-dependent letter-probability matrices which describe the +probability of each possible letter at each position in the pattern. Individual MEME motifs do not contain gaps. Patterns +with variable-length gaps are split by MEME into two or more separate motifs. MEME takes as input a group of sequences and +outputs as many motifs as requested. MEME uses statistical modeling techniques to automatically choose the best width, number +of occurrences, and description for each motif. + +.. class:: infomark + +For detailed information on MEME, click here_, or view the license_. + +.. _here: http://meme-suite.org/doc/meme.html?man_type=web +.. _license: http://meme-suite.org/doc/copyright.html?man_type=web + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr189</citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/meme_motif_databases.loc.sample Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,7 @@ +# This file has the format (white space characters are TAB characters): +# +#<value> <name> <path> <description> +# +#So, meme_motif_databases.loc could look something like this: +# +#2017_12 2017_12 /meme_motif_databases/2017_12 December 2017
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input1.fasta Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,66 @@ +>chr21_19617074_19617124_+ +AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA +>chr21_26934381_26934431_+ +GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT +>chr21_28217753_28217803_- +CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA +>chr21_31710037_31710087_- +AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT +>chr21_31744582_31744632_- +CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA +>chr21_31768316_31768366_+ +AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTCAGAATCTT +>chr21_31914206_31914256_- +TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC +>chr21_31933633_31933683_- +TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT +>chr21_31962741_31962791_- +ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA +>chr21_31964683_31964733_+ +TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC +>chr21_31973364_31973414_+ +aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC +>chr21_31992870_31992920_+ +CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT +>chr21_32185595_32185645_- +TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA +>chr21_32202076_32202126_- +TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT +>chr21_32253899_32253949_- +AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC +>chr21_32410820_32410870_- +TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG +>chr21_36411748_36411798_- +ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt +>chr21_37838750_37838800_- +gatggttttataaggggcctcaccctcggctcagccctcattcttctcct +>chr21_45705687_45705737_+ +CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC +>chr21_45971413_45971463_- +CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca +>chr21_45978668_45978718_- +CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca +>chr21_45993530_45993580_+ +CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC +>chr21_46020421_46020471_+ +GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc +>chr21_46031920_46031970_+ +GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC +>chr21_46046964_46047014_+ +ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca +>chr21_46057197_46057247_+ +ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC +>chr21_46086869_46086919_- +GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC +>chr21_46102103_46102153_- +AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47575506_47575556_- +TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG +>chr21_47575506_47575556_- +TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.html Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,86 @@ +<!doctype html> +<html> + <head> + <meta charset="UTF-8"> + <title>MEME ChIP</title> + </head> + <body onload="page_loaded()" onpageshow="page_shown(event)" + onresize="page_resize()" onscroll="delayed_process_draw_tasks()"> + <div class="prog_logo big"> + <h1>MEME-ChIP</h1> + <h2>Motif Analysis of Large Nucleotide Datasets</h2> + </div> + <p> + If you use MEME-ChIP in your research, please cite the following paper:<br /> + </p> + <!-- navigation --> + <div class="pad2"> + <a class="jump" href="#data_sec">Motifs</a> + | + <a class="jump" href="#programs_sec">Programs</a> + | + <a class="jump" href="#input_sec">Input Files</a> + | + <a class="jump" href="#info_sec">Program information</a> + </div> + <!-- alert the user when their browser is not up to the task --> + <noscript><h1 style="color:red">Javascript is required to view these results!</h1></noscript> + <h1 id="html5_warning" style="color:red; display:none;">Your browser does not support canvas!</h1> + <script>if (!window.HTMLCanvasElement) $("html5_warning").style.display = "block";</script> + <!-- write out the job description --> + <span id="ins_desc"></span> + <script>make_description($("ins_desc"), data["description"]);</script> + <!-- write out clustered motifs --> + <h2 class="mainh pad2" id="data_sec">Motifs</h2> + <div class="box"> + <p>The significant motifs + (E-value ≤ <span id="ins_filter_thresh"></span>) + found by the programs MEME, DREME and CentriMo; + clustered by similarity and ordered by E-value.</p> + <script>$("ins_filter_thresh").innerHTML = data["filter_thresh"]; </script> + <div class="motifbox"> + <span class="action" onclick="show_all(true)">Expand All Clusters</span> + <span class="action" onclick="show_all(false)">Collapse All Clusters</span> + </div> + <div id="logos"></div> + <script>make_clustered_motifs($("logos"));</script> + </div> + <!-- write out a list of all programs run --> + <h2 class="mainh pad2" id="programs_sec">Programs</h2> + <div class="box" id="program_listing"></div> + <script>make_program_listing($("program_listing"));</script> + <!-- write out input files --> + <h2 id="input_sec" class="mainh pad2">Input Files</h2> + <div id="input_files" class="box"> + <div id="sequence_db"></div> + <script>make_sequence_db_listing('sequence_db', "Primary Sequences");</script> + <div id="neg_sequence_db"></div> + <script>make_sequence_db_listing('neg_sequence_db', "Control Sequences");</script> + <h4 id="motif_dbs_header">Motifs</h4> + <div id="motif_dbs"></div> + <script>make_motif_db_listing($("motif_dbs"));</script> + </div> + <!-- list information on this program --> + <div id="info_sec" class="bar"> + <div class="subsection"> + <h5 id="version">MEME-ChIP version</h5> + <span id="ins_version"></span> + (Release date: <span id="ins_release"></span>)<br> + </div> + <script> + $("ins_version").innerHTML = data["version"]; + $("ins_release").innerHTML = data["release"]; + </script> + <div class="subsection"> + <h5 id="reference">Reference</h5> + <div class="subsection"> + <h5 id="command">Command line summary</h5> + <textarea id="cmd" rows="5" style="width:100%;" readonly="readonly"> + </textarea> + <script>$("cmd").value = data["cmd"].join(" ");</script> + </div> + </div> + </div> + <div id="scrollpad"></div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Apr 20 09:03:44 2018 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of meme_motif_databases --> + <table name="meme_motif_databases" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/meme_motif_databases.loc" /> + </table> +</tables>