view findPeaks.xml @ 16:687df269e597 draft

Uploaded
author kevyin
date Wed, 19 Dec 2012 17:28:55 -0500
parents
children
line wrap: on
line source

<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.2">
    <requirements>
        <requirement type="package" version="4.1">homer</requirement>
    </requirements>
    <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description>
    <!--<version_command></version_command>-->
    <command>
        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile

    #if $control_tagDir:
        -i $control_tagDir.extra_files_path
    #end if

        2&gt; $out_log || echo "Error running findPeaks." >&amp;2
    </command>
    <inputs>
        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" help="Must be made with homer_makeTagDirectory" />
        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" help="Must be made with homer_makeTagDirectory" />
        <param type="text" name="options" label="Extra options" value="" help="See link below for more options">
          <sanitizer>
            <valid initial="string.printable">
             <remove value="&apos;"/>
             <remove value="/"/>
            </valid>
            <mapping initial="none">
              <add source="&apos;" target="__sq__"/>
            </mapping>
          </sanitizer>
        </param>
    </inputs>
    <outputs>
        <!--<data format="html" name="html_outfile" label="index" />-->
        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
    </outputs>
    <tests>
        <test>
            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
        </test>
    </tests>

    <help>

  .. class:: infomark

  **Homer findPeaks**

  For more options, look under: "Command line options for findPeaks"

  http://biowhat.ucsd.edu/homer/ngs/peaks.html

  TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.

**Parameter list**

Command line options (not all of them are supported)::

	Usage: findPeaks &lt;tag directory&gt; [options]

	Finds peaks in the provided tag directory.  By default, peak list printed to stdout

	General analysis options:
		-o &lt;filename|auto&gt; (file name for to output peaks, default: stdout)
			&quot;-o auto&quot; will send output to &quot;&lt;tag directory&gt;/peaks.txt&quot;, &quot;.../regions.txt&quot;,
			or &quot;.../transcripts.txt&quot; depending on the &quot;-style&quot; option
		-style &lt;option&gt; (Specialized options for specific analysis strategies)
			factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt,  default)
			histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt)
			groseq (de novo transcript identification from GroSeq data, transcripts.txt)
			tss (TSS identification from 5&apos; RNA sequencing, tss.txt)
			dnase (Hypersensitivity [crawford style (nicking)], peaks.txt)

	chipseq/histone options:
		-i &lt;input tag directory&gt; (Experiment to use as IgG/Input/Control)
		-size &lt;#&gt; (Peak size, default: auto)
		-minDist &lt;#&gt; (minimum distance between peaks, default: peak size x2)
		-gsize &lt;#&gt; (Set effective mappable genome size, default: 2e9)
		-fragLength &lt;#|auto&gt; (Approximate fragment length, default: auto)
		-inputFragLength &lt;#|auto&gt; (Approximate fragment length of input tags, default: auto)
		-tbp &lt;#&gt; (Maximum tags per bp to count, 0 = no limit, default: auto)
		-inputtbp &lt;#&gt; (Maximum tags per bp to count in input, 0 = no limit, default: auto)
		-strand &lt;both|separate&gt; (find peaks using tags on both strands or separate, default:both)
		-norm # (Tag count to normalize to, default 10000000)
		-region (extends start/stop coordinates to cover full region considered &quot;enriched&quot;)
		-center (Centers peaks on maximum tag overlap and calculates focus ratios)
		-nfr (Centers peaks on most likely nucleosome free region [works best with mnase data])
			(-center and -nfr can be performed later with &quot;getPeakTags&quot;

	Peak Filtering options: (set -F/-L/-C to 0 to skip)
		-F &lt;#&gt; (fold enrichment over input tag count, default: 4.0)
		  -P &lt;#&gt; (poisson p-value threshold relative to input tag count, default: 0.0001)
		-L &lt;#&gt; (fold enrichment over local tag count, default: 4.0)
		  -LP &lt;#&gt; (poisson p-value threshold relative to local tag count, default: 0.0001)
		-C &lt;#&gt; (fold enrichment limit of expected unique tag positions, default: 2.0)
		-localSize &lt;#&gt; (region to check for local tag enrichment, default: 10000)
		-inputSize &lt;#&gt; (Size of region to search for control tags, default: 2x peak size)
		-fdr &lt;#&gt; (False discovery rate, default = 0.001)
		-poisson &lt;#&gt; (Set poisson p-value cutoff, default: uses fdr)
		-tagThreshold &lt;#&gt; (Set # of tags to define a peak, default: 25)
		-ntagThreshold &lt;#&gt; (Set # of normalized tags to define a peak, by default uses 1e7 for norm)
		-minTagThreshold &lt;#&gt; (Absolute minimum tags per peak, default: expected tags per peak)

	GroSeq Options: (Need to specify &quot;-style groseq&quot;):
		-tssSize &lt;#&gt; (size of region for initiation detection/artifact size, default: 250)
		-minBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 1000)
		-maxBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 10000)
		-tssFold &lt;#&gt; (fold enrichment for new initiation dectection, default: 4.0)
		-bodyFold &lt;#&gt; (fold enrichment for new transcript dectection, default: 4.0)
		-endFold &lt;#&gt; (end transcript when levels are this much less than the start, default: 10.0)
		-fragLength &lt;#&gt; (Approximate fragment length, default: 150)
		-uniqmap &lt;directory&gt; (directory of binary files specifying uniquely mappable locations)
			Download from http://biowhat.ucsd.edu/homer/groseq/
		-confPvalue &lt;#&gt; (confidence p-value: 1.00e-05)
		-minReadDepth &lt;#&gt; (Minimum initial read depth for transcripts, default: auto)
		-pseudoCount &lt;#&gt; (Pseudo tag count, default: 2.0)
		-gtf &lt;filename&gt; (Output de novo transcripts in GTF format)
			&quot;-o auto&quot; will produce &lt;dir&gt;/transcripts.txt and &lt;dir&gt;/transcripts.gtf
    </help>
</tool>