Mercurial > repos > kevyin > homer

--- a/README	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-Homer wrapper for Galaxy
-
-The homer tools will need to be accessible from command line
-
-Code repo: https://bitbucket.org/gvl/homer
-
-=========================================:
-LICENSE for this wrapper:
-=========================================:
-Kevin Ying
-Garvan Institute: http://www.garvan.org.au
-GVL: https://genome.edu.au/wiki/GVL
-
-http://opensource.org/licenses/mit-license.php
-
--- a/annotatePeaks.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.4">
-    <requirements>
-        <requirement type="package" version="4.1">homer</requirement>
-    </requirements>
-    <description></description>
-    <!--<version_command></version_command>-->
-    <command>
-        annotatePeaks.pl $input_bed $genome_selector 1&gt; $out_annotated
-        2&gt; $out_log || echo "Error running annotatePeaks." >&amp;2
-    </command>
-    <inputs>
-        <param format="tabular,bed" name="input_bed" type="data" label="Homer peaks OR BED format"/>
-        <param name="genome_selector" type="select" label="Genome version">
-            <option value="hg19" selected="true">hg19</option>
-        </param>
-        <param type="text" name="options" label="Extra options" value="" help="See link below for more options">
-          <sanitizer>
-            <valid initial="string.printable">
-             <remove value="&apos;"/>
-             <remove value="/"/>
-            </valid>
-            <mapping initial="none">
-              <add source="&apos;" target="__sq__"/>
-            </mapping>
-          </sanitizer>
-        </param>
-    </inputs>
-    <outputs>
-        <!--<data format="html" name="html_outfile" label="index" />-->
-        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
-        <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />
-        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />
-    </outputs>
-    <tests>
-        <test>
-            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
-            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
-        </test>
-    </tests>
-
-    <help>
-
-        .. class:: infomark
-
-        **Homer annoatePeaks**
-
-        More information on accepted formats and options
-
-        http://biowhat.ucsd.edu/homer/ngs/annotation.html
-
-        TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
-
-**Parameter list**
-
-Command line options (not all of them are supported)::
-
-	Usage: annotatePeaks.pl &lt;peak file | tss&gt; &lt;genome version&gt;  [additional options...]
-
-	Available Genomes (required argument): (name,org,directory,default promoter set)
-			-- or --
-		Custom: provide the path to genome FASTA files (directory or single file)
-
-	User defined annotation files (default is UCSC refGene annotation):
-		annotatePeaks.pl accepts GTF (gene transfer formatted) files to annotate positions relative
-		to custom annotations, such as those from de novo transcript discovery or Gencode.
-		-gtf &lt;gtf format file&gt; (-gff and -gff3 can work for those files, but GTF is better)
-
-	Peak vs. tss/tts/rna mode (works with custom GTF file):
-		If the first argument is &quot;tss&quot; (i.e. annotatePeaks.pl tss hg18 ...) then a TSS centric
-		analysis will be carried out.  Tag counts and motifs will be found relative to the TSS.
-		(no position file needed) [&quot;tts&quot; now works too - e.g. 3&apos; end of gene]
-		[&quot;rna&quot; specifies gene bodies, will automaticall set &quot;-size given&quot;]
-		NOTE: The default TSS peak size is 4000 bp, i.e. +/- 2kb (change with -size option)
-		-list &lt;gene id list&gt; (subset of genes to perform analysis [unigene, gene id, accession,
-			 probe, etc.], default = all promoters)
-		-cTSS &lt;promoter position file i.e. peak file&gt; (should be centered on TSS)
-
-	Primary Annotation Options:
-		-mask (Masked repeats, can also add &apos;r&apos; to end of genome name)
-		-m &lt;motif file 1&gt; [motif file 2] ... (list of motifs to find in peaks)
-			-mscore (reports the highest log-odds score within the peak)
-			-nmotifs (reports the number of motifs per peak)
-			-mdist (reports distance to closest motif)
-			-mfasta &lt;filename&gt; (reports sites in a fasta file - for building new motifs)
-			-fm &lt;motif file 1&gt; [motif file 2] (list of motifs to filter from above)
-			-rmrevopp &lt;#&gt; (only count sites found within &lt;#&gt; on both strands once, i.e. palindromic)
-			-matrix &lt;prefix&gt; (outputs a motif co-occurrence files:
-				prefix.count.matrix.txt - number of peaks with motif co-occurrence
-				prefix.ratio.matrix.txt - ratio of observed vs. expected  co-occurrence
-				prefix.logPvalue.matrix.txt - co-occurrence enrichment
-				prefix.stats.txt - table of pair-wise motif co-occurrence statistics
-				additional options:
-				-matrixMinDist &lt;#&gt; (minimum distance between motif pairs - to avoid overlap)
-				-matrixMaxDist &lt;#&gt; (maximum distance between motif pairs)
-			-mbed &lt;filename&gt; (Output motif positions to a BED file to load at UCSC (or -mpeak))
-			-mlogic &lt;filename&gt; (will output stats on common motif orientations)
-		-d &lt;tag directory 1&gt; [tag directory 2] ... (list of experiment directories to show
-			tag counts for) NOTE: -dfile &lt;file&gt; where file is a list of directories in first column
-		-bedGraph &lt;bedGraph file 1&gt; [bedGraph file 2] ... (read coverage counts from bedGraph files)
-		-wig &lt;wiggle file 1&gt; [wiggle file 2] ... (read coverage counts from wiggle files)
-		-p &lt;peak file&gt; [peak file 2] ... (to find nearest peaks)
-			-pdist to report only distance (-pdist2 gives directional distance)
-			-pcount to report number of peaks within region
-		-vcf &lt;VCF file&gt; (annotate peaks with genetic variation infomation, one col per individual)
-			-editDistance (Computes the # bp changes relative to reference)
-			-individuals &lt;name1&gt; [name2] ... (restrict analysis to these individuals)
-		-gene &lt;data file&gt; ... (Adds additional data to result based on the closest gene.
-			This is useful for adding gene expression data.  The file must have a header,
-			and the first column must be a GeneID, Accession number, etc.  If the peak
-			cannot be mapped to data in the file then the entry will be left empty.
-		-go &lt;output directory&gt; (perform GO analysis using genes near peaks)
-		-genomeOntology &lt;output directory&gt; (perform genomeOntology analysis on peaks)
-			-gsize &lt;#&gt; (Genome size for genomeOntology analysis, default: 2e9)
-
-	Annotation vs. Histogram mode:
-		-hist &lt;bin size in bp&gt; (i.e 1, 2, 5, 10, 20, 50, 100 etc.)
-		The -hist option can be used to generate histograms of position dependent features relative
-		to the center of peaks.  This is primarily meant to be used with -d and -m options to map
-		distribution of motifs and ChIP-Seq tags.  For ChIP-Seq peaks for a Transcription factor
-		you might want to use the -center option (below) to center peaks on the known motif
-		** If using &quot;-size given&quot;, histogram will be scaled to each region (i.e. 0-100%), with
-		the -hist parameter being the number of bins to divide each region into.
-			Histogram Mode specific Options:
-			-nuc (calculated mononucleotide frequencies at each position,
-				Will report by default if extracting sequence for other purposes like motifs)
-			-di (calculated dinucleotide frequencies at each position)
-			-histNorm &lt;#&gt; (normalize the total tag count for each region to 1, where &lt;#&gt; is the
-				minimum tag total per region - use to avoid tag spikes from low coverage
-			-ghist (outputs profiles for each gene, for peak shape clustering)
-			-rm &lt;#&gt; (remove occurrences of same motif that occur within # bp)
-
-	Peak Centering: (other options are ignored)
-		-center &lt;motif file&gt; (This will re-center peaks on the specified motif, or remove peak
-			if there is no motif in the peak.  ONLY recentering will be performed, and all other
-			options will be ignored.  This will output a new peak file that can then be reanalyzed
-			to reveal fine-grain structure in peaks (It is advised to use -size &lt; 200) with this
-			to keep peaks from moving too far (-mirror flips the position)
-		-multi (returns genomic positions of all sites instead of just the closest to center)
-
-	Advanced Options:
-		-len &lt;#&gt; / -fragLength &lt;#&gt; (Fragment length, default=auto, might want to set to 0 for RNA)
-		-size &lt;#&gt; (Peak size[from center of peak], default=inferred from peak file)
-			-size #,# (i.e. -size -10,50 count tags from -10 bp to +50 bp from center)
-			-size &quot;given&quot; (count tags etc. using the actual regions - for variable length regions)
-		-log (output tag counts as log2(x+1+rand) values - for scatter plots)
-		-sqrt (output tag counts as sqrt(x+rand) values - for scatter plots)
-		-strand &lt;+|-|both&gt; (Count tags on specific strands relative to peak, default: both)
-		-pc &lt;#&gt; (maximum number of tags to count per bp, default=0 [no maximum])
-		-cons (Retrieve conservation information for peaks/sites)
-		-CpG (Calculate CpG/GC content)
-		-ratio (process tag values as ratios - i.e. chip-seq, or mCpG/CpG)
-		-nfr (report nuclesome free region scores instead of tag counts, also -nfrSize &lt;#&gt;)
-		-norevopp (do not search for motifs on the opposite strand [works with -center too])
-		-noadj (do not adjust the tag counts based on total tags sequenced)
-		-norm &lt;#&gt; (normalize tags to this tag count, default=1e7, 0=average tag count in all directories)
-		-pdist (only report distance to nearest peak using -p, not peak name)
-		-map &lt;mapping file&gt; (mapping between peak IDs and promoter IDs, overrides closest assignment)
-		-noann, -nogene (skip genome annotation step, skip TSS annotation)
-		-homer1/-homer2 (by default, the new version of homer [-homer2] is used for finding motifs)
-
-
-    </help>
-</tool>
-
--- a/bed2pos.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.3">
-    <requirements>
-        <requirement type="package" version="4.1">homer</requirement>
-    </requirements>
-    <description></description>
-    <!--<version_command></version_command>-->
-    <command>
-        bed2pos.pl $input_bed 1&gt; $out_pos
-        2&gt; $out_log || echo "Error running bed2pos." >&amp;2
-    </command>
-    <inputs>
-        <param format="tabular,bed" name="input_bed" type="data" label="BED file" />
-    </inputs>
-    <outputs>
-        <!--<data format="html" name="html_outfile" label="index" />-->
-        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
-        <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" />
-        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" />
-    </outputs>
-    <tests>
-        <test>
-            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
-            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
-        </test>
-    </tests>
-
-    <help>
-        .. class:: infomark
-
-        Converts: BED -(to)-> homer peak positions
-
-        **Homer bed2pos.pl**
-
-        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
-    </help>
-</tool>
-
--- a/findPeaks.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.2">
-    <requirements>
-        <requirement type="package" version="4.1">homer</requirement>
-    </requirements>
-    <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description>
-    <!--<version_command></version_command>-->
-    <command>
-        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
-
-    #if $control_tagDir:
-        -i $control_tagDir.extra_files_path
-    #end if
-
-        2&gt; $out_log || echo "Error running findPeaks." >&amp;2
-    </command>
-    <inputs>
-        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" help="Must be made with homer_makeTagDirectory" />
-        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" help="Must be made with homer_makeTagDirectory" />
-        <param type="text" name="options" label="Extra options" value="" help="See link below for more options">
-          <sanitizer>
-            <valid initial="string.printable">
-             <remove value="&apos;"/>
-             <remove value="/"/>
-            </valid>
-            <mapping initial="none">
-              <add source="&apos;" target="__sq__"/>
-            </mapping>
-          </sanitizer>
-        </param>
-    </inputs>
-    <outputs>
-        <!--<data format="html" name="html_outfile" label="index" />-->
-        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
-        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
-        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
-    </outputs>
-    <tests>
-        <test>
-            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
-            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
-        </test>
-    </tests>
-
-    <help>
-
-  .. class:: infomark
-
-  **Homer findPeaks**
-
-  For more options, look under: "Command line options for findPeaks"
-
-  http://biowhat.ucsd.edu/homer/ngs/peaks.html
-
-  TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
-
-**Parameter list**
-
-Command line options (not all of them are supported)::
-
-	Usage: findPeaks &lt;tag directory&gt; [options]
-
-	Finds peaks in the provided tag directory.  By default, peak list printed to stdout
-
-	General analysis options:
-		-o &lt;filename|auto&gt; (file name for to output peaks, default: stdout)
-			&quot;-o auto&quot; will send output to &quot;&lt;tag directory&gt;/peaks.txt&quot;, &quot;.../regions.txt&quot;,
-			or &quot;.../transcripts.txt&quot; depending on the &quot;-style&quot; option
-		-style &lt;option&gt; (Specialized options for specific analysis strategies)
-			factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt,  default)
-			histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt)
-			groseq (de novo transcript identification from GroSeq data, transcripts.txt)
-			tss (TSS identification from 5&apos; RNA sequencing, tss.txt)
-			dnase (Hypersensitivity [crawford style (nicking)], peaks.txt)
-
-	chipseq/histone options:
-		-i &lt;input tag directory&gt; (Experiment to use as IgG/Input/Control)
-		-size &lt;#&gt; (Peak size, default: auto)
-		-minDist &lt;#&gt; (minimum distance between peaks, default: peak size x2)
-		-gsize &lt;#&gt; (Set effective mappable genome size, default: 2e9)
-		-fragLength &lt;#|auto&gt; (Approximate fragment length, default: auto)
-		-inputFragLength &lt;#|auto&gt; (Approximate fragment length of input tags, default: auto)
-		-tbp &lt;#&gt; (Maximum tags per bp to count, 0 = no limit, default: auto)
-		-inputtbp &lt;#&gt; (Maximum tags per bp to count in input, 0 = no limit, default: auto)
-		-strand &lt;both|separate&gt; (find peaks using tags on both strands or separate, default:both)
-		-norm # (Tag count to normalize to, default 10000000)
-		-region (extends start/stop coordinates to cover full region considered &quot;enriched&quot;)
-		-center (Centers peaks on maximum tag overlap and calculates focus ratios)
-		-nfr (Centers peaks on most likely nucleosome free region [works best with mnase data])
-			(-center and -nfr can be performed later with &quot;getPeakTags&quot;
-
-	Peak Filtering options: (set -F/-L/-C to 0 to skip)
-		-F &lt;#&gt; (fold enrichment over input tag count, default: 4.0)
-		  -P &lt;#&gt; (poisson p-value threshold relative to input tag count, default: 0.0001)
-		-L &lt;#&gt; (fold enrichment over local tag count, default: 4.0)
-		  -LP &lt;#&gt; (poisson p-value threshold relative to local tag count, default: 0.0001)
-		-C &lt;#&gt; (fold enrichment limit of expected unique tag positions, default: 2.0)
-		-localSize &lt;#&gt; (region to check for local tag enrichment, default: 10000)
-		-inputSize &lt;#&gt; (Size of region to search for control tags, default: 2x peak size)
-		-fdr &lt;#&gt; (False discovery rate, default = 0.001)
-		-poisson &lt;#&gt; (Set poisson p-value cutoff, default: uses fdr)
-		-tagThreshold &lt;#&gt; (Set # of tags to define a peak, default: 25)
-		-ntagThreshold &lt;#&gt; (Set # of normalized tags to define a peak, by default uses 1e7 for norm)
-		-minTagThreshold &lt;#&gt; (Absolute minimum tags per peak, default: expected tags per peak)
-
-	GroSeq Options: (Need to specify &quot;-style groseq&quot;):
-		-tssSize &lt;#&gt; (size of region for initiation detection/artifact size, default: 250)
-		-minBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 1000)
-		-maxBodySize &lt;#&gt; (size of regoin for transcript body detection, default: 10000)
-		-tssFold &lt;#&gt; (fold enrichment for new initiation dectection, default: 4.0)
-		-bodyFold &lt;#&gt; (fold enrichment for new transcript dectection, default: 4.0)
-		-endFold &lt;#&gt; (end transcript when levels are this much less than the start, default: 10.0)
-		-fragLength &lt;#&gt; (Approximate fragment length, default: 150)
-		-uniqmap &lt;directory&gt; (directory of binary files specifying uniquely mappable locations)
-			Download from http://biowhat.ucsd.edu/homer/groseq/
-		-confPvalue &lt;#&gt; (confidence p-value: 1.00e-05)
-		-minReadDepth &lt;#&gt; (Minimum initial read depth for transcripts, default: auto)
-		-pseudoCount &lt;#&gt; (Pseudo tag count, default: 2.0)
-		-gtf &lt;filename&gt; (Output de novo transcripts in GTF format)
-			&quot;-o auto&quot; will produce &lt;dir&gt;/transcripts.txt and &lt;dir&gt;/transcripts.gtf
-    </help>
-</tool>
-
--- a/makeTagDirectory.py	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-"""
-
-
-"""
-import re
-import os
-import sys
-import subprocess
-import optparse
-import shutil
-import tempfile
-
-def getFileString(fpath, outpath):
-    """
-    format a nice file size string
-    """
-    size = ''
-    fp = os.path.join(outpath, fpath)
-    s = '? ?'
-    if os.path.isfile(fp):
-        n = float(os.path.getsize(fp))
-        if n > 2**20:
-            size = ' (%1.1f MB)' % (n/2**20)
-        elif n > 2**10:
-            size = ' (%1.1f KB)' % (n/2**10)
-        elif n > 0:
-            size = ' (%d B)' % (int(n))
-        s = '%s %s' % (fpath, size)
-    return s
-
-class makeTagDirectory():
-    """wrapper
-    """
-
-    def __init__(self,opts=None, args=None):
-        self.opts = opts
-        self.args = args
-
-    def run_makeTagDirectory(self):
-        """
-        makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2]
-
-        """
-        if self.opts.format != "bam":
-            cl = [self.opts.executable] + args + ["-format" , self.opts.format]
-        else:
-            cl = [self.opts.executable] + args
-        print cl
-        p = subprocess.Popen(cl)
-        retval = p.wait()
-
-
-        html = self.gen_html(args[0])
-        #html = self.gen_html()
-        return html,retval
-
-    def gen_html(self, dr=os.getcwd()):
-        flist = os.listdir(dr)
-        print flist
-        """ add a list of all files in the tagdirectory
-        """
-        res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n']
-
-        flist.sort()
-        for i,f in enumerate(flist):
-             if not(os.path.isdir(f)):
-                 fn = os.path.split(f)[-1]
-                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr)))
-
-        res.append('</table>\n')
-
-        return res
-
-if __name__ == '__main__':
-    op = optparse.OptionParser()
-    op.add_option('-e', '--executable', default='makeTagDirectory')
-    op.add_option('-o', '--htmloutput', default=None)
-    op.add_option('-f', '--format', default="sam")
-    opts, args = op.parse_args()
-    #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable
-
-    #if not os.path.exists(opts.outputdir):
-        #os.makedirs(opts.outputdir)
-    f = makeTagDirectory(opts, args)
-
-    html,retval = f.run_makeTagDirectory()
-    f = open(opts.htmloutput, 'w')
-    f.write(''.join(html))
-    f.close()
-    if retval <> 0:
-         print >> sys.stderr, serr # indicate failure
-
-
-
--- a/makeTagDirectory.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,146 +0,0 @@
-<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.1">
-    <requirements>
-        <requirement type="package" version="4.1">homer</requirement>
-    </requirements>
-    <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description>
-    <!--<version_command></version_command>-->
-    <command interpreter="python">makeTagDirectory.py ${tagDir.files_path}
-        #for $alignF in $alignmentFiles
-          $alignF.file -f $alignF.file.ext
-        #end for
-          -o $tagDir
-        2&gt; $out_log || echo "Error running homer_makeTagDirectory." >&amp;2
-
-    </command>
-    <inputs>
-        <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />
-        <param type="text" name="options" label="Extra options" value="" help="See below for more options">
-          <sanitizer>
-            <valid initial="string.printable">
-             <remove value="&apos;"/>
-             <remove value="/"/>
-            </valid>
-            <mapping initial="none">
-              <add source="&apos;" target="__sq__"/>
-            </mapping>
-          </sanitizer>
-        </param>
-        <repeat name="alignmentFiles" title="Alignment Files">
-          <param name="file" label="Add file" type="data" format="sam,bed" help="Alignments in SAM or BED format" />
-        </repeat>
-    </inputs>
-    <outputs>
-        <!--<data format="homerTagDirectory" name="tagDir" label="${title} tag directory" />-->
-        <data format="html" name="tagDir" label="${title} tag directory" />
-        <data format="txt" name="out_log" label="${title}.log" />
-        <!--<data format="html" name="html_outfile" label="index" />-->
-        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
-    </outputs>
-
-
-    <tests>
-        <!--<test>-->
-            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
-            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
-        <!--</test>-->
-    </tests>
-
-    <help>
-
-  .. class:: infomark
-
-  **Homer makeTagDirectory**
-
-  For more options, look under: "Command line options"
-
-  http://biowhat.ucsd.edu/homer/ngs/tagDir.html
-
-**Parameter list**
-
-Command line options (not all of them are supported)::
-
-	Usage: makeTagDirectory &lt;directory&gt; &lt;alignment file 1&gt; [file 2] ... [options]
-
-	Creates a platform-independent &apos;tag directory&apos; for later analysis.
-	Currently BED, eland, bowtie, and sam files are accepted. The program will try to
-	automatically detect the alignment format if not specified.  Program will also
-	unzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly
-	Existing tag directories can be added or combined to make a new one using -d/-t
-	If more than one format is needed and the program cannot auto-detect it properly,
-	make separate tag directories by running the program separately, then combine them.
-	To perform QC/manipulations on an existing tag directory, add &quot;-update&quot;
-
-	Options:
-		-fragLength &lt;# | given&gt; (Set estimated fragment length - given: use read lengths)
-			By default treats the sample as a single read ChIP-Seq experiment
-		-format &lt;X&gt; where X can be: (with column specifications underneath)
-			bed - BED format files:
-				(1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-)
-				-force5th (5th column of BED file contains # of reads mapping to position)
-			sam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM)
-				-unique (keep if there is a single best alignment based on mapq)
-					-mapq &lt;#&gt; (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:)
-				-keepOne (keep one of the best alignments even if others exist)
-				-keepAll (include all alignments in SAM file)
-				-mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag)
-			bowtie - output from bowtie (run with --best -k 2 options)
-				(1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo)
-			eland_result - output from basic eland
-				(1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr,
-							8:position,9:F/R,10-:mismatches
-			eland_export - output from illumina pipeline (22 columns total)
-				(1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand)
-			eland_extended - output from illumina pipeline (4 columns total)
-				(1:read name,2:sequence,3:match stats,4:positions[,])
-			mCpGbed - encode style mCpG reporting in extended BED format, no auto-detect
-				(1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC)
-			allC - Lister style output files detailing the read information about all cytosines
-				(1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C
-				-minCounts &lt;#&gt; (minimum number of reads to report mC/C ratios, default: 10)
-				-mCcontext &lt;CG|CHG|CHH|all&gt; (only use C&apos;s in this context, default: CG)
-			HiCsummary - minimal paired-end read mapping information
-				(1:readname,2:chr1,3:5&apos;pos1,4:strand1,5:chr2,6:5&apos;pos2,7:strand2)
-		-force5th (5th column of BED file contains # of reads mapping to position)
-		-d &lt;tag directory&gt; [tag directory 2] ... (add Tag directory to new tag directory)
-		-t &lt;tag file&gt; [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory)
-		-single (Create a single tags.tsv file for all &quot;chromosomes&quot; - i.e. if &gt;100 chromosomes)
-		-update (Use current tag directory for QC/processing, do not parse new alignment files)
-		-tbp &lt;#&gt; (Maximum tags per bp, default: no maximum)
-		-precision &lt;1|2|3&gt; (number of decimal places to use for tag totals, default: 1)
-
-		GC-bias options:
-		-genome &lt;genome version&gt; (To see available genomes, use &quot;-genome list&quot;)
-			-or- (for custom genomes):
-		-genome &lt;path-to-FASTA file or directory of FASTA files&gt;
-
-		-checkGC (check Sequence bias, requires &quot;-genome&quot;)
-			-freqStart &lt;#&gt; (offset to start calculating frequency, default: -50)
-			-freqEnd &lt;#&gt; (distance past fragment length to calculate frequency, default: +50)
-			-oligoStart &lt;#&gt; (oligo bias start)
-			-oligoEnd &lt;#&gt; (oligo bias end)
-		-normGC &lt;target GC profile file&gt; (i.e. tagGCcontent.txt file from control experiment)
-			Use &quot;-normGC default&quot; to match the genomic GC distribution
-		-normFixedOligo &lt;oligoFreqFile&gt; (normalize 5&apos; end bias, &quot;-normFixedOligo default&quot; ok)
-		-minNormRatio &lt;#&gt; (Minimum deflation ratio of tag counts, default: 0.25)
-		-maxNormRatio &lt;#&gt; (Maximum inflation ratio of tag counts, default: 2.0)
-		-iterNorm &lt;#&gt; (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the
-			resulting distrubtion is no more than #% different than target, i.e. 0.1,default: off)
-
-	Paired-end/HiC options
-		-illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1)
-		-removePEbg (remove paired end tags within 1.5x fragment length on same chr)
-			-PEbgLength &lt;#&gt; (remove PE  reads facing on another within this distance, default: 1.5x fragLen)
-		-restrictionSite &lt;seq&gt; (i.e. AAGCTT for HindIII, assign data &lt; 1.5x fragment length to sites)
-			Must specify genome sequence directory too. (-rsmis &lt;#&gt; to specify mismatches, def: 0)
-			-both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all)
-			-removeSelfLigation (removes reads linking same restriction fragment)
-			-removeRestrictionEnds (removes reads starting on a restriction fragment)
-			-assignMidPoint (will place reads in the middle of HindIII fragments)
-			-restrictionSiteLength &lt;#&gt; (maximum distance from restriction site, default: 1.5x fragLen)
-		-removeSpikes &lt;size bp&gt; &lt;#&gt; (remove tags from regions with &gt; than # times
-			the average tags per size bp, suggest &quot;-removeSpikes 10000 5&quot;)
-
-
-    </help>
-</tool>
-
--- a/pos2bed.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.3">
-    <requirements>
-        <requirement type="package" version="4.1" >homer</requirement>
-    </requirements>
-    <description></description>
-    <!--<version_command></version_command>-->
-    <command>
-        pos2bed.pl $input_peak 1&gt; $out_bed
-        2&gt; $out_log || echo "Error running pos2bed." >&amp;2
-    </command>
-    <inputs>
-        <param format="tabular" name="input_peak" type="data" label="Homer peak positions" />
-    </inputs>
-    <outputs>
-        <!--<data format="html" name="html_outfile" label="index" />-->
-        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
-        <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" />
-        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" />
-    </outputs>
-    <tests>
-        <test>
-            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
-            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
-        </test>
-    </tests>
-
-    <help>
-        .. class:: infomark
-
-        Converts: homer peak positions -(to)-> BED format
-
-        **Homer pos2bed.pl**
-
-        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
-    </help>
-</tool>
-
--- a/tool_dependencies-disabled.xml	Wed Dec 19 20:20:12 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="homer" version="4.1">
-    <install version="4.1">
-      <actions>
-        <action type="download_by_url">http://biowhat.ucsd.edu/homer/configureHomer.pl</action>
-        <!--<action type="shell_command">perl ./configureHomer.pl -install</action>-->
-        <!--<action type="shell_command">perl ./configureHomer.pl -install hg19</action>-->
-        <action type="move_directory_files">
-          <source_directory>./</source_directory>
-          <destination_directory>$INSTALL_DIR</destination_directory>
-        </action>
-        <action type="set_environment">
-          <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-        </action>
-      </actions>
-    </install>
-    <readme>
-      I'm sorry but this does not work
-
-    </readme>
-  </package>
-</tool_dependency>
-