changeset 12:ce96e302ee92 draft default tip

Uploaded
author dongjun
date Thu, 10 Jan 2013 17:33:09 -0500
parents 002dc17c9824
children
files mosaics.xml mosaics_1.5.4.tar.gz mosaics_wrapper.pl
diffstat 3 files changed, 412 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mosaics.xml	Thu Jan 10 17:33:09 2013 -0500
@@ -0,0 +1,313 @@
+<tool id="MOSAiCS" name="MOSAiCS: MOdel-based one and two Sample Analysis and inference for ChIP-Seq Data" version="2.0.0">
+  
+  <description></description>
+	
+  <parallelism method="basic"></parallelism>
+  
+  <requirements>
+	  <requirement type="binary">R</requirement>
+  </requirements>
+
+  <command interpreter="perl">
+    mosaics_wrapper.pl 
+      ## ChIP file info
+      $readFileType.chipParams.chip
+      $readFileType.chipParams.chipFileFormat
+      ## control file info
+      $readFileType.controlParams.control
+      $readFileType.controlParams.controlFileFormat
+      ## peak file info
+      $out_peak
+      $OutfileFormat
+      ## analysis type
+      IO
+      ## optional output
+      $report_summary
+      $report_gof
+      $report_exploratory
+      ## settings for model fitting and peak calling: required (FALSE, FALSE, 0.05, 200, 50, 0)
+      $readFileType.pet
+      $by_chr
+      $fdrLevel
+      $fragLen
+      $binSize
+      $capping
+      #if $fitParams.fSettingsType == "preSet"
+      ## settings for model fitting and peak calling: optional
+	BIC
+	automatic
+	0.25
+	200
+	50
+	10
+      ## setting for parallel computing
+    TRUE
+    8
+      #else
+	$fitParams.signalModel
+	$fitParams.bgEst
+	$fitParams.d
+	$fitParams.maxgap
+	$fitParams.minsize	
+	$fitParams.thres
+	$fitParams.parallel
+	$fitParams.nCore
+      #end if
+  </command>
+
+  <inputs>
+  	 <conditional name="readFileType">
+		<param name="pet" type="select" label="Paired-end tag (PET) or single-end tag (SET) data">
+			<option value="FALSE">Single-end tag (SET) data</option>
+			<option value="TRUE">Paired-end tag (PET) data</option>
+		</param>
+		<when value="FALSE"> <!-- SET -->	
+			<conditional name="chipParams">
+				<param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
+					<option value="eland_result">Eland result</option>
+					<option value="eland_extended">Eland extended</option>
+					<option value="eland_export">Eland export</option>
+					<option value="bowtie">Bowtie default</option>
+					<option value="sam">SAM</option>
+					<option value="bed">BED</option>
+					<option value="csem">CSEM</option>
+				</param>
+				<when value="eland_result">
+					<param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
+				</when>
+				<when value="eland_extended">
+					<param name="chip" type="data" format="eland" label="Eland extended file for ChIP sample"/>
+				</when>
+				<when value="eland_export">
+					<param name="chip" type="data" format="eland" label="Eland export file for ChIP sample"/>
+				</when>
+				<when value="bowtie">
+					<param name="chip" type="data" label="Bowtie default file for ChIP sample"/>
+				</when>
+				<when value="sam">
+					<param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
+				</when>
+				<when value="bed">
+					<param name="chip" type="data" format="bed" label="BED file for ChIP sample"/>
+				</when>
+				<when value="csem">
+					<param name="chip" type="data" format="csem" label="CSEM file for ChIP sample"/>
+				</when>
+			</conditional> <!-- chipParams -->
+			
+			<conditional name="controlParams">
+				<param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM file formats for single-end tag (SET) data.">
+					<option value="eland_result">Eland result</option>
+					<option value="eland_extended">Eland extended</option>
+					<option value="eland_export">Eland export</option>
+					<option value="bowtie">Bowtie default</option>
+					<option value="sam">SAM</option>
+					<option value="bed">BED</option>
+					<option value="csem">CSEM</option>
+				</param>
+				<when value="eland_result">
+					<param name="control" type="data" format="eland" label="Eland result file for control sample"/>
+				</when>
+				<when value="eland_extended">
+					<param name="control" type="data" format="eland" label="Eland extended file for control sample"/>
+				</when>
+				<when value="eland_export">
+					<param name="control" type="data" format="eland" label="Eland export file for control sample"/>
+				</when>
+				<when value="bowtie">
+					<param name="control" type="data" label="Bowtie default file for control sample"/>
+				</when>
+				<when value="sam">
+					<param name="control" type="data" format="sam" label="SAM file for control sample"/>
+				</when>
+				<when value="bed">
+					<param name="control" type="data" format="bed" label="BED file for control sample"/>
+				</when>
+				<when value="csem">
+					<param name="control" type="data" format="csem" label="CSEM file for control sample"/>
+				</when>
+			</conditional> <!-- controlParams -->
+		</when>
+		<when value="TRUE">	<!-- PET -->
+			<conditional name="chipParams">
+				<param name="chipFileFormat" type="select" label="Select file format for ChIP sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
+					<option value="eland_result">Eland result</option>
+					<option value="sam">SAM</option>
+				</param>
+				<when value="eland_result">
+					<param name="chip" type="data" format="eland" label="Eland result file for ChIP sample"/>
+				</when>
+				<when value="sam">
+					<param name="chip" type="data" format="sam" label="SAM file for ChIP sample"/>
+				</when>
+			</conditional> <!-- chipParams -->
+			
+			<conditional name="controlParams">
+				<param name="controlFileFormat" type="select" label="Select file format for control sample" help="MOSAiCS accepts aligned read files as input. MOSAiCS accepts Eland result and SAM file formats for paired-end tag (PET) data.">
+					<option value="eland_result">Eland result</option>
+					<option value="sam">SAM</option>
+				</param>
+				<when value="eland_result">
+					<param name="control" type="data" format="eland" label="Eland result file for control sample"/>
+				</when>
+				<when value="sam">
+					<param name="control" type="data" format="sam" label="SAM file for control sample"/>
+				</when>
+			</conditional> <!-- controlParams -->
+		</when>
+	</conditional><!-- readFileType -->
+	
+	<param name="OutfileFormat" type="select" label="Select file format for peak calling results" help="MOSAiCS can export peak calling results into BED or GFF file formats, or as a table.">
+		<option value="bed">BED</option>
+		<option value="gff">GFF</option>
+		<option value="txt">table</option>
+	</param>
+	<param name="summary" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Summary of model fitting and peak calling" />
+	<param name="gof" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Goodness of fit (GOF) plots" />
+	<param name="exploratory" type="boolean" truevalue="1" falsevalue="0" display="checkboxes" label="Reports for diagnostics: Plots of exploratory analysis" />
+	
+	<param name="by_chr" type="select" label="Genome-wide analysis or chromosome-wise analysis" help="If genome-wide analysis is used, one model is fitted for all the chromosomes. If chromosome-wise analysis is used, different model is fitted for each chromosome separately." >
+		<option value="FALSE">Genome-wide analysis</option>
+		<option value="TRUE">Chromosome-wise analysis</option>
+	</param>
+	<param name="fdrLevel" type="float" value="0.05" min="0" max="1" label="False discovery rate (FDR)" help="FDR level for peak detection (default: 0.05)" />
+	<param name="fragLen" type="integer" value="200" label="Average fragment length" help="Default: 200." />
+	<param name="binSize" type="integer" value="200" label="Bin size" help="By default, bin size equals to the average fragment length." />
+	<param name="capping" type="integer" value="0" label="Maximum number of reads allowed to start at each nucleotide position" help="If non-positive value is specified (e.g., 0), any number of reads are allowed at each nucleotide position (i.e., no filtering). By default, filtering is NOT used." />
+	
+	<conditional name="fitParams">
+		<param name="fSettingsType" type="select" label="Settings for model fitting and peak calling" help="For most peak calling applications, use the 'Commonly used' setting. If you want access to all parameters, use 'Full parameter list'.">
+			<option value="preSet">Commonly used</option>
+			<option value="full">Full parameter list</option>
+		</param>
+		<when value="preSet" />
+		<when value="full">
+			<param name="signalModel" type="select" label="Signal model" help="By default, signal model is chosen using BIC.">
+				<option value="BIC">Automatic model selection based on BIC</option>
+				<option value="1S">One-signal-component model</option>
+				<option value="2S">Two-signal-component model</option>
+			</param>
+			<param name="bgEst" type="select" label="Background estimation approach" help="By default, background estimation approach is automatically determined based on the data.">
+				<option value="automatic">Automatic selection based on the data</option>
+				<option value="matchLow">Based on bins with low tag counts</option>
+				<option value="rMOM">Robust method of moment (MOM)</option>
+			</param>
+			<param name="d" type="float" value="0.25" label="d" help="Parameter for estimating background distribution. Default is 0.25." />
+			<param name="maxgap" type="integer" value="200" label="maxgap" help="Initial nearby peaks are merged if the distance (in bp) between them is less than 'maxgap'. Default is 200." />
+			<param name="minsize" type="integer" value="50" label="minsize" help="An initial peak is removed if its width is narrower than 'minsize'. Default is 50." />
+			<param name="thres" type="integer" value="10" label="thres" help="A bin within initial peak is removed if its ChIP tag counts are less than 'thres'. Default is 10." />
+			<param name="parallel" type="select" label="Use parallel computing?">
+				<option value="TRUE">Use parallel computing.</option>
+				<option value="FALSE">NOT use parallel computing.</option>
+			</param>
+			<param name="nCore" type="integer" value="8" label="Number of CPUs" help="Number of CPUs used for parallel computing. Relevant only when parallel computing is used. Default is to use 8 CPUs." />
+		</when> <!-- full -->
+	</conditional> <!-- fitParams -->
+  </inputs>
+
+  <outputs>
+	<data format="tabular" name="out_peak">
+		<change_format>
+			<when input="OutfileFormat" value="bed" format="bed" />
+			<when input="OutfileFormat" value="gff" format="gff" />
+		</change_format>
+	</data>
+	<data format="txt" name="report_summary">
+		<filter>summary == 1</filter>
+	</data>
+	<data format="pdf" name="report_gof">
+		<filter>gof == 1</filter>
+	</data>
+	<data format="pdf" name="report_exploratory">
+		<filter>exploratory == 1</filter>
+	</data>
+  </outputs>
+
+  <help>
+
+**What it does**
+
+MOSAiCS is a statistical framework for the analysis of ChIP-seq data and it stands for MOdel-based one and two Sample Analysis and Inference for ChIP-Seq Data. MOSAiCS is based on a flexible parametric mixture modeling approach for detecting peaks (i.e., enriched regions).
+MOSAiCS is also available in Bioconductor_ as a R package.
+We encourage questions or requests regarding MOSAiCS to be posted on our `Google group`_.
+
+Please cite: Kuan PF, Chung D, Pan G, Thomson JA, Stewart R, and Keles S (2011), "`A statistical framework for the analysis of ChIP-Seq data`_," *Journal of the American Statistical Association*, Vol. 106, pp. 891--903.
+
+.. _Bioconductor: http://www.bioconductor.org/help/bioc-views/2.11/bioc/html/mosaics.html
+.. _Google group: http://groups.google.com/group/mosaics_user_group
+.. _A statistical framework for the analysis of ChIP-Seq data: http://pubs.amstat.org/doi/abs/10.1198/jasa.2011.ap09706
+
+------
+
+**Input formats**
+
+MOSAiCS accepts aligned read files of ChIP and control samples as input. Currently, MOSAiCS accepts Eland result, Eland extended, Eland export, Bowtie default, SAM, BED, and CSEM formats for single-end tag (SET) data. For paired-end tag (PET) data, MOSAiCS accepts Eland result and SAM formats.
+
+------
+
+**Outputs**
+
+Peak calling results of MOSAiCS can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single peak.
+
+If the output is a table, it has the following columns::
+
+   Column    Description
+   --------  --------------------------------------------------------
+     1       Chromosome of the peak
+     2       Start position of the peak
+     3       End position of the peak
+     4       Width of the peak
+     5       Averaged posterior probability of the peak
+     6       Minimum posterior probability of the peak
+     7       Averaged ChIP tag counts of the peak
+     8       Maximum ChIP tag counts of the peak
+     9       Averaged control tag counts of the peak
+    10       Averaged control tag counts of the peak, scaled by sequencing depth
+    11       Averaged log base 2 ratio of ChIP over input tag counts
+
+If the output is in BED format, it has the following columns::
+
+    Column        Description
+    ------------  --------------------------------------------------------
+    1 chrom       Chromosome of the peak
+    2 chromStart  Start position of the peak
+    3 chromEnd    End position of the peak
+    4 name        Always "MOSAiCS_peak"
+    5 score       Averaged ChIP tag counts of the peak
+
+If the output is in GFF format, it has the following columns::
+
+    Column     Description
+    ---------  --------------------------------------------------------
+    1 seqname  Chromosome of the peak
+    2 source   Always "MOSAiCS"
+    3 feature  Always "MOSAiCS_peak"
+    4 start    Start position of the peak
+    5 end      End position of the peak
+    6 score    Averaged ChIP tag counts of the peak
+    7 strand   Always "."
+    8 frame    Always "."
+    9 group    Always "."
+
+------
+
+**Reports for diagnostics**
+
+*Summary of model fitting and peak calling*: This report provides information about input and output files, parameter settings used for model fitting and peak calling, and brief summary of peak calling results. 
+
+*Goodness of fit (GOF) plots*: This report allows visual comparisons of the fits of the background, one-signal-component, and two-signal-component models with the actual data. 
+
+*Plots of exploratory analysis*: This report provides the histograms of ChIP and control samples and the scatter plots of ChIP versus control tag counts.
+
+More details regarding these reports can be found here_:
+
+------
+
+**Settings for model fitting and peak calling**
+
+More details about the tuning of these parameters can be found here_:
+
+.. _here: http://www.bioconductor.org/packages/2.11/bioc/vignettes/mosaics/inst/doc/mosaics-example.pdf
+
+  </help>
+</tool>
Binary file mosaics_1.5.4.tar.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mosaics_wrapper.pl	Thu Jan 10 17:33:09 2013 -0500
@@ -0,0 +1,99 @@
+# Wrapper for MOSAiCS
+# Written by Dongjun Chung, Jan. 15, 2013
+
+#!/usr/bin/env perl;
+use warnings;
+use strict;
+use File::Temp qw/tempfile/;
+use File::Temp qw/tempdir/;
+use File::Basename;
+
+# parse command arguments
+# inactive arguments: useChrfile, chrfile, excludeChr
+# meaningless argument: analysisType (for later use)
+
+die "Usage: perl mosaics_wrapper.pl [chip_path] [chip_file_format] [control_path] [control_file_format] [peak_path] [peak_file_format] [analysis_type] [report_summary_path] [report_gof_path] [report_exploratory_path] [PET?] [chromosome-wise?] [fdr_level] [frag_len] [bin_size] [capping] [signal_model] [bg_est_method] [d] [maxgap] [minsize] [thres] [parallel] [n_core]" unless @ARGV == 24;
+
+my ( $chip_path, $chip_file_format, $control_path, $control_file_format, $peak_path, $peak_file_format, $analysis_type, $report_summary_path, $report_gof_path, $report_exploratory_path, $pet, $by_chr, $fdr_level, $frag_len, $bin_size, $capping, $signal_model, $bg_est_method, $d, $maxgap, $minsize, $thres, $parallel, $n_core ) = @ARGV;
+
+# parse options: analysis type
+
+if ( $analysis_type ne "IO" ) {
+	print "Only 'IO' is supported for analysis type!\n";
+	exit 1;	
+}
+
+
+# parse options: report summary
+
+my $report_summary = "FALSE";
+if ( $report_summary_path ne "None" ) {
+	$report_summary = "TRUE";
+}
+
+# parse options: report GOF
+
+my $report_gof = "FALSE";
+if ( $report_gof_path ne "None" ) {
+	$report_gof = "TRUE";
+}
+
+# parse options: report exploratory analysis
+
+my $report_exploratory = "FALSE";
+if ( $report_exploratory_path ne "None" ) {
+	$report_exploratory = "TRUE";
+}
+
+# write a R scrip to run
+
+my $tempdir_bin = tempdir();
+
+my $cmd = qq|
+	suppressPackageStartupMessages(library(mosaics))
+	try( suppressPackageStartupMessages(library(parallel)), silent=TRUE )
+	
+	mosaicsRunAll( 
+		chipFile="$chip_path", 
+		chipFileFormat="$chip_file_format", 
+		controlFile="$control_path", 
+		controlFileFormat="$control_file_format",
+		binfileDir="$tempdir_bin",
+		peakFile="$peak_path", 
+		peakFileFormat="$peak_file_format",
+		reportSummary=$report_summary, 
+		summaryFile="$report_summary_path", 
+		reportExploratory=$report_exploratory,
+		exploratoryFile="$report_exploratory_path", 
+		reportGOF=$report_gof,
+		gofFile="$report_gof_path", 
+		PET=$pet, 
+		byChr=$by_chr, 
+		useChrfile=FALSE, 
+		chrfile=NULL, 
+		excludeChr=NULL, 
+		FDR=$fdr_level, 
+		fragLen=$frag_len, 
+		binSize=$bin_size, 
+		capping=$capping, 
+		bgEst="$bg_est_method",
+		d=$d, 
+		signalModel="$signal_model", 
+		maxgap=$maxgap, 
+		minsize=$minsize, 
+		thres=$thres, 
+		parallel=$parallel,
+		nCore=$n_core )
+	
+	q()
+	|;
+
+# run R
+
+open( FT, "| R --slave --vanilla >& /dev/null" ) or die "Couldn't call R!\n";
+print FT $cmd, "\n";
+close FT or die "Couldn't finish R!\n";
+
+exit;
+
+