diff macs21_wrapper.xml @ 0:06cb587a5e87 draft

Uploaded initial version 2.1.0-4.
author pjbriggs
date Tue, 30 Jun 2015 08:16:18 -0400
parents
children 02a01ea54722
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macs21_wrapper.xml	Tue Jun 30 08:16:18 2015 -0400
@@ -0,0 +1,398 @@
+<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-4">
+  <description>Model-based Analysis of ChIP-Seq: peak calling</description>
+  <requirements>
+    <requirement type="package" version="2.7">python</requirement>
+    <requirement type="package" version="1.9">numpy</requirement>
+    <requirement type="package" version="2.1.0.20140616">macs2</requirement>
+    <requirement type="package" version="3.1.2">R</requirement>
+    <requirement type="package" version="1.0">ucsc_tools_for_macs21</requirement>
+  </requirements>
+  <version_command>macs2 --version</version_command>
+  <command interpreter="python">
+    macs21_wrapper.py callpeak
+    ##
+    ## ChIP-seq input
+    $input_chipseq_file1
+    ##
+    ## ChIP-seq control
+    #if str($input_control_file1) != 'None'
+       -c $input_control_file1
+    #end if
+    ##
+    --format=$input_chipseq_file1.extension
+    --name="$experiment_name"
+    --bw=$bw
+    ##
+    ## Genome size
+    #if str($genome_size.gsize) == ''
+       --gsize=$genome_size.user_defined_gsize
+    #else:
+       --gsize=$genome_size.gsize
+    #end if
+    ##
+    ## Broad peaks
+    #if str($broad_options.broad_regions) == 'broad'
+       --broad --broad-cutoff=$broad_options.broad_cutoff
+    #end if
+    ##
+    ## (no)model options
+    #if str($nomodel_type.nomodel_type_selector) == 'nomodel'
+       --nomodel --extsize=$nomodel_type.extsize
+    #end if
+    ##
+    ## pq value select options
+    #if str($pq_options.pq_options_selector) == 'qvalue'
+       --qvalue=$pq_options.qvalue
+    #else
+       --pvalue=$pq_options.pvalue
+    #end if
+    ##
+    ## Bedgraph options
+    #if $bdg_options.bdg
+       -B $bdg_options.spmr
+    #end if
+    ##
+    ## Advanced options
+    #if $advanced_options.advanced_options_selector
+       --mfold $advanced_options.mfoldlo $advanced_options.mfoldhi
+       $advanced_options.nolambda
+       $advanced_options.call_summits
+       #if str($advanced_options.keep_duplicates.keep_dup) == ''
+          --keep-dup $advanced_options.keep_duplicates.maximum_tags
+       #else
+          --keep-dup $advanced_options.keep_duplicates.keep_dup
+       #end if
+    #else
+       ## Defaults if advanced options not set
+       --mfold 10 30 --keep-dup 1
+    #end if
+    ##
+    ## Output files
+    --output-summits=$output_summits_bed_file
+    --output-extra-files=$output_extra_files
+    --output-extra-files-path=$output_extra_files.files_path
+    ##
+    ## Narrow/broad peak outputs
+    #if str($broad_options.broad_regions) == 'broad'
+       --output-broadpeaks=$output_broadpeaks_file
+       --output-gappedpeaks=$output_gappedpeaks_file
+    #else
+       --output-narrowpeaks=$output_narrowpeaks_file
+    #end if
+    ##
+    ## Bedgraph outputs
+    #if $bdg_options.bdg
+       --output-pileup=$output_treat_pileup_file 
+       --output-lambda-bedgraph=$output_lambda_bedgraph_file
+       #if $bdg_options.make_bigwig
+          --output-bigwig=$output_bigwig_file
+          --length=$GALAXY_DATA_INDEX_DIR/shared/ucsc/chrom/${input_chipseq_file1.dbkey}.len
+       #end if
+    #end if
+    ##
+    ## XLS/interval output
+    #if str($xls_to_interval) == 'True'
+       --output-xls-to-interval=$output_xls_to_interval_peaks_file
+    #else
+       --output-peaks=$output_peaks_file
+    #end if
+  </command>
+  <inputs>
+    <!--experiment name used as base for output file names -->
+    <param name="experiment_name" type="text" value="MACS2.1.0 in Galaxy" size="50"
+	   label="Experiment Name"/>
+    <!--choose 'broad' or 'narrow' regions-->
+    <conditional name="broad_options">
+      <param name="broad_regions" type="select" label="Type of region to call"
+	     help="Broad regions are formed by linking nearby enriched regions">
+	<option value="" selected="true">Narrow regions</option>
+	<option value="broad">Broad regions</option>
+      </param>
+      <when value="broad">
+	<param name="broad_cutoff" type="float"
+	       label="Cutoff for broad regions"
+	       value="0.1" help="default: 0.1 (--broad-cutoff)"/>
+      </when>
+    </conditional>
+    <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
+	   label="ChIP-seq read file" />
+    <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
+	   label="ChIP-seq control read file" />
+    <conditional name="genome_size">
+      <param name="gsize" type="select" label="Effective genome size"
+	     help="Either pre-defined (for common organisms), or user-defined (--gsize)">
+	<option value="hs" selected="true">Human (2.7e9)</option>
+	<option value="mm">Mouse (1.87e9)</option>
+	<option value="ce">C. elegans (9e7)</option>
+	<option value="dm">Fruitfly (1.2e8)</option>
+	<option value="">User-defined</option>
+      </param>
+      <when value="">
+	<!-- User-defined effective genome size -->
+	<param name="user_defined_gsize" type="float" value=""
+	       label="Enter effective genome size (number of bases)"
+	       help="e.g. '1.0e+9' or '1000000000'" />
+      </when>
+    </conditional>
+    <param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>
+    <param name="xls_to_interval" label="Include XLS file from MACS"
+	   type="boolean" truevalue="True" falsevalue="False" checked="True"
+	   help="MACS2 XLS file will be output to the history in 'interval' format (suitable for subsequent analysis in Galaxy). Note that start positions are 1-based."/>
+
+    <conditional name="bdg_options">
+      <param name="bdg"
+	     label="Save treatment and control lambda pileups in bedGraph"
+	     type="boolean" truevalue="-B" falsevalue="" checked="False" />
+      <when value="-B">
+	<param name="spmr"
+	       type="boolean" truevalue="--SPMR" falsevalue="" checked="False"
+	       label="Save signal per million reads for fragment pileup profiles"
+	       help="(--SPMR)" />
+	<param name="make_bigwig" type="boolean" checked="True"
+	       truevalue="True" falsevalue=""
+	       label="Also generate bigWig file from bedGraph"
+	       help="bigWig file can used in subsequent analyses e.g. CEAS" />
+      </when>
+      <when value="">
+	<!-- Display nothing -->
+      </when>
+    </conditional>
+
+    <conditional name="pq_options">
+      <param name="pq_options_selector" type="select"
+	     label="Select p-value or q-value" help="default uses q-value">
+	<option value="qvalue">q-value</option>
+	<option value="pvalue">p-value</option>
+      </param>
+      <when value="pvalue">
+	<param name="pvalue" type="float"
+	       label="p-value cutoff for binding region detection"
+	       value="1e-2" help="default: 1e-2 (--pvalue)"/>
+      </when>
+      <when value="qvalue">
+	<param name="qvalue" type="float"
+	       label="q-value cutoff for binding region detection"
+	       value="0.01" help="default: 0.01 (--qvalue)"/>
+      </when>
+    </conditional>
+    <conditional name="advanced_options">
+      <param name="advanced_options_selector"
+	     type="boolean" truevalue="on" falsevalue="off" checked="False"
+	     label="Use advanced options?" />
+      <when value="on">
+        <param name="mfoldlo" type="integer"
+	       label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (lower-limit)"
+	       value="10" help="(--mfold)"/>
+	<param name="mfoldhi" type="integer"
+	       label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (upper-limit)"
+	       value="30" help="(--mfold)"/>
+	<param name="nolambda"
+	       label="Use fixed background lambda as local lambda for every binding region"
+	       type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
+	       help="(--nolambda)"/>
+	<param name="call_summits"
+	       label="Detect subpeaks within binding region"
+	       type="boolean" truevalue="--call-summits" falsevalue="" checked="False"
+	       help="(--call-summits)"/>
+	<conditional name="keep_duplicates">
+	  <param name="keep_dup" type="select"
+		 label="Use of duplicate reads">
+	    <option value="auto">Automatically calculate maximum number of duplicates to keep (auto)</option>
+	    <option value="all">Use all duplicates (all)</option>
+	    <option value="" selected="true">Manually specify maxium number of duplicates</option>
+	  </param>
+	  <when value="">
+	    <param name="maximum_tags" type="integer" value="1"
+		   label="Maxium number of duplicated tags to keep at each location"/>
+	  </when>
+	</conditional>
+      </when>
+      <when value="off">
+	<!--display nothing-->
+      </when>
+    </conditional>
+    <conditional name="nomodel_type">
+      <param name="nomodel_type_selector" type="select" label="Build Model">
+	<option value="nomodel">Do not build the shifting model (--nomodel enabled)</option>
+        <option value="create_model" selected="true">Build the shifting model (--nomodel disabled)</option>
+      </param>
+      <when value="nomodel">
+        <param name="extsize" type="integer" label="Arbitrary extension size in bp" value="200" help="Used as fragment size to extend each read towards 3' end (--extsize)"/>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <!--callpeaks output-->
+    <data name="output_extra_files" format="html"
+	  label="${tool.name}: callpeak on ${on_string} (html report)">
+    </data>
+    <data name="output_summits_bed_file" format="bed"
+	  label="${tool.name}: callpeak on ${on_string} (summits: bed)">
+    </data>
+    <data name="output_peaks_file" format="xls"
+	  label="${tool.name}: callpeak on ${on_string} (peaks: xls)">
+      <filter>xls_to_interval is False</filter>
+    </data>
+    <data name="output_narrowpeaks_file" format="interval"
+	  label="${tool.name}: callpeak on ${on_string} (peaks: narrowPeak)">
+      <filter>broad_options['broad_regions'] == ''</filter>
+    </data>
+    <data name="output_broadpeaks_file" format="interval"
+	  label="${tool.name}: callpeak on ${on_string} (peaks: broadPeak)">
+      <filter>broad_options['broad_regions'] == 'broad'</filter>
+    </data>
+    <data name="output_gappedpeaks_file" format="interval"
+	  label="${tool.name}: callpeak on ${on_string} (peaks: gappedPeak)">
+      <filter>broad_options['broad_regions'] == 'broad'</filter>
+    </data>
+    <data name="output_xls_to_interval_peaks_file" format="interval"
+	  label="${tool.name}: callpeak on ${on_string} (peaks: interval)">
+      <filter>xls_to_interval is True</filter>
+    </data>
+    <data name="output_treat_pileup_file" format="bedgraph"
+	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bedGraph)">
+      <filter>bdg_options['bdg'] is True</filter>
+    </data>
+    <data name="output_lambda_bedgraph_file" format="bedgraph"
+	  label="${tool.name}: callpeak on ${on_string} (control lambda: bedGraph)">
+      <filter>bdg_options['bdg'] is True</filter>
+    </data>
+    <data name="output_bigwig_file" format="bigwig"
+	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bigWig)">
+      <filter>bdg_options['bdg'] is True</filter>
+      <filter>bdg_options['make_bigwig'] is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <!-- Peak calling without bigwig output -->
+    <test>
+      <!-- Inputs -->
+      <param name="experiment_name" value="test_MACS2.1.0" />
+      <param name="broad_regions" value="" />
+      <param name="input_chipseq_file1" value="test_region_IP.bed" dbkey="galGal3"
+	     ftype="bed" />
+      <param name="input_control_file1" value="test_region_Input.bed"
+	     ftype="bed" />
+      <param name="gsize" value="" />
+      <param name="user_defined_gsize" value="775000000.0" />
+      <param name="bw" value="300" />
+      <param name="xls_to_interval" value="true" />
+      <param name="bdg_options|bdg" value="-B" />
+      <param name="bdg_options|spmr" value="--SPMR" />
+      <param name="bdg_options|make_bigwig" value="false" />
+      <param name="pq_options_selector" value="qvalue" />
+      <param name="qvalue" value="0.05" />
+      <param name="advanced_options_selector" value="true" />
+      <param name="advanced_options|mfoldlo" value="5" />
+      <param name="advanced_options|mfoldhi" value="50" />
+      <param name="advanced_options|nolambda" value="" />
+      <param name="advanced_options|call_summits" value="" />
+      <param name="advanced_options|keep_duplicates" value="" />
+      <param name="advanced_options|maximum_tags" value="1" />
+      <param name="nomodel_type_selector" value="nomodel" />
+      <param name="nomodel_type|extsize" value="243" />
+      <!-- Outputs -->
+      <output name="output_extra_files" file="test_MACS2.1.0_html_report.zip"
+	      compare="sim_size" delta="1500" />
+      <output name="output_summits_bed_file" file="test_MACS2.1.0_summits.bed" />
+      <output name="output_narrowpeaks_file" file="test_MACS2.1.0_peaks_narrowPeak.interval" />
+      <output name="output_xls_to_interval_peaks_file"
+	      file="test_MACS2.1.0_peaks.xls.re_match"
+	      compare="re_match" lines_diff="1" />
+      <output name="output_treat_pileup_file" file="test_MACS2.1.0_treat_pileup.bdg" />
+      <output name="output_lambda_bedgraph_file" file="test_MACS2.1.0_control_lambda.bdg" />
+    </test>
+    <!-- Peak calling with bigwig output -->
+    <test>
+      <!-- Inputs -->
+      <param name="experiment_name" value="test_MACS2.1.0" />
+      <param name="broad_regions" value="" />
+      <param name="input_chipseq_file1" value="test_region_IP.bed" dbkey="galGal3"
+	     ftype="bed" />
+      <param name="input_control_file1" value="test_region_Input.bed"
+	     ftype="bed" />
+      <param name="gsize" value="" />
+      <param name="user_defined_gsize" value="775000000.0" />
+      <param name="bw" value="300" />
+      <param name="xls_to_interval" value="true" />
+      <param name="bdg_options|bdg" value="-B" />
+      <param name="bdg_options|spmr" value="--SPMR" />
+      <param name="bdg_options|make_bigwig" value="true" />
+      <param name="pq_options_selector" value="qvalue" />
+      <param name="qvalue" value="0.05" />
+      <param name="advanced_options_selector" value="true" />
+      <param name="advanced_options|mfoldlo" value="5" />
+      <param name="advanced_options|mfoldhi" value="50" />
+      <param name="advanced_options|nolambda" value="" />
+      <param name="advanced_options|call_summits" value="" />
+      <param name="advanced_options|keep_duplicates" value="" />
+      <param name="advanced_options|maximum_tags" value="1" />
+      <param name="nomodel_type_selector" value="nomodel" />
+      <param name="nomodel_type|extsize" value="243" />
+      <!-- Outputs -->
+      <output name="output_extra_files" file="test_MACS2.1.0_bw_html_report.zip"
+	      compare="sim_size" delta="2500" />
+      <output name="output_summits_bed_file" file="test_MACS2.1.0_summits.bed" />
+      <output name="output_narrowpeaks_file" file="test_MACS2.1.0_peaks_narrowPeak.interval" />
+      <output name="output_xls_to_interval_peaks_file"
+	      file="test_MACS2.1.0_peaks.xls.re_match"
+	      compare="re_match" lines_diff="1" />
+      <output name="output_treat_pileup_file" file="test_MACS2.1.0_treat_pileup.bdg" />
+      <output name="output_lambda_bedgraph_file" file="test_MACS2.1.0_control_lambda.bdg" />
+      <output name="output_bigwig_file" file="test_MACS2.1.0_treat_pileup.bw"
+	      compare="sim_size" />
+    </test>
+  </tests>
+  <help>
+**What it does**
+
+MACS (Model-based Analysis of ChIP-seq) 2.1.0 provides algorithms for identifying
+transcript factor binding sites. The program can be used either for ChIP-Seq data alone,
+or with control sample data to improve specificity.
+
+View the MACS2 documentation at:
+https://github.com/taoliu/MACS/blob/master/README.rst
+
+------
+
+**Usage**
+
+The tool interfaces with the **callpeak** function in MACS, which calls peaks from
+alignment results.
+
+------
+
+**Credits**
+
+This Galaxy tool was based on the MACS2 tool hosted in the Galaxy toolshed at
+
+ * http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2
+
+(specifically the 16:14f378e35191 revision of the tool) which is credited to Ziru
+Zhou. This version is a reimplemented version developed within the Bioinformatics
+Core Facility at the University of Manchester, which uses more up-to-date Galaxy
+syntax and adds some extra features.
+
+The tool runs Tao Liu's MACS2 software:
+
+ * https://github.com/taoliu/MACS
+
+The reference for MACS is:
+
+ * Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C,
+   Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS).
+   Genome Biol. 2008;9(9):R137.
+
+Please kindly acknowledge both this Galaxy tool and the MACS2 package if you
+use it.
+  </help>
+  <citations>
+    <!--
+    See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+    Can be either DOI or Bibtex
+    Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex
+    -->
+    <citation type="doi">10.1186/gb-2008-9-9-r137</citation>
+  </citations>
+</tool>