comparison makeTagDirectory.xml @ 16:687df269e597 draft

Uploaded
author kevyin
date Wed, 19 Dec 2012 17:28:55 -0500
parents
children
comparison
equal deleted inserted replaced
15:529485c1dda1 16:687df269e597
1 <tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.1">
2 <requirements>
3 <requirement type="package" version="4.1">homer</requirement>
4 </requirements>
5 <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description>
6 <!--<version_command></version_command>-->
7 <command interpreter="python">makeTagDirectory.py ${tagDir.files_path}
8 #for $alignF in $alignmentFiles
9 $alignF.file -f $alignF.file.ext
10 #end for
11 -o $tagDir
12 2&gt; $out_log || echo "Error running homer_makeTagDirectory." >&amp;2
13
14 </command>
15 <inputs>
16 <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />
17 <param type="text" name="options" label="Extra options" value="" help="See below for more options">
18 <sanitizer>
19 <valid initial="string.printable">
20 <remove value="&apos;"/>
21 <remove value="/"/>
22 </valid>
23 <mapping initial="none">
24 <add source="&apos;" target="__sq__"/>
25 </mapping>
26 </sanitizer>
27 </param>
28 <repeat name="alignmentFiles" title="Alignment Files">
29 <param name="file" label="Add file" type="data" format="sam,bed" help="Alignments in SAM or BED format" />
30 </repeat>
31 </inputs>
32 <outputs>
33 <!--<data format="homerTagDirectory" name="tagDir" label="${title} tag directory" />-->
34 <data format="html" name="tagDir" label="${title} tag directory" />
35 <data format="txt" name="out_log" label="${title}.log" />
36 <!--<data format="html" name="html_outfile" label="index" />-->
37 <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
38 </outputs>
39
40
41 <tests>
42 <!--<test>-->
43 <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
44 <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
45 <!--</test>-->
46 </tests>
47
48 <help>
49
50 .. class:: infomark
51
52 **Homer makeTagDirectory**
53
54 For more options, look under: "Command line options"
55
56 http://biowhat.ucsd.edu/homer/ngs/tagDir.html
57
58 **Parameter list**
59
60 Command line options (not all of them are supported)::
61
62 Usage: makeTagDirectory &lt;directory&gt; &lt;alignment file 1&gt; [file 2] ... [options]
63
64 Creates a platform-independent &apos;tag directory&apos; for later analysis.
65 Currently BED, eland, bowtie, and sam files are accepted. The program will try to
66 automatically detect the alignment format if not specified. Program will also
67 unzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly
68 Existing tag directories can be added or combined to make a new one using -d/-t
69 If more than one format is needed and the program cannot auto-detect it properly,
70 make separate tag directories by running the program separately, then combine them.
71 To perform QC/manipulations on an existing tag directory, add &quot;-update&quot;
72
73 Options:
74 -fragLength &lt;# | given&gt; (Set estimated fragment length - given: use read lengths)
75 By default treats the sample as a single read ChIP-Seq experiment
76 -format &lt;X&gt; where X can be: (with column specifications underneath)
77 bed - BED format files:
78 (1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-)
79 -force5th (5th column of BED file contains # of reads mapping to position)
80 sam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM)
81 -unique (keep if there is a single best alignment based on mapq)
82 -mapq &lt;#&gt; (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:)
83 -keepOne (keep one of the best alignments even if others exist)
84 -keepAll (include all alignments in SAM file)
85 -mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag)
86 bowtie - output from bowtie (run with --best -k 2 options)
87 (1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo)
88 eland_result - output from basic eland
89 (1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr,
90 8:position,9:F/R,10-:mismatches
91 eland_export - output from illumina pipeline (22 columns total)
92 (1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand)
93 eland_extended - output from illumina pipeline (4 columns total)
94 (1:read name,2:sequence,3:match stats,4:positions[,])
95 mCpGbed - encode style mCpG reporting in extended BED format, no auto-detect
96 (1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC)
97 allC - Lister style output files detailing the read information about all cytosines
98 (1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C
99 -minCounts &lt;#&gt; (minimum number of reads to report mC/C ratios, default: 10)
100 -mCcontext &lt;CG|CHG|CHH|all&gt; (only use C&apos;s in this context, default: CG)
101 HiCsummary - minimal paired-end read mapping information
102 (1:readname,2:chr1,3:5&apos;pos1,4:strand1,5:chr2,6:5&apos;pos2,7:strand2)
103 -force5th (5th column of BED file contains # of reads mapping to position)
104 -d &lt;tag directory&gt; [tag directory 2] ... (add Tag directory to new tag directory)
105 -t &lt;tag file&gt; [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory)
106 -single (Create a single tags.tsv file for all &quot;chromosomes&quot; - i.e. if &gt;100 chromosomes)
107 -update (Use current tag directory for QC/processing, do not parse new alignment files)
108 -tbp &lt;#&gt; (Maximum tags per bp, default: no maximum)
109 -precision &lt;1|2|3&gt; (number of decimal places to use for tag totals, default: 1)
110
111 GC-bias options:
112 -genome &lt;genome version&gt; (To see available genomes, use &quot;-genome list&quot;)
113 -or- (for custom genomes):
114 -genome &lt;path-to-FASTA file or directory of FASTA files&gt;
115
116 -checkGC (check Sequence bias, requires &quot;-genome&quot;)
117 -freqStart &lt;#&gt; (offset to start calculating frequency, default: -50)
118 -freqEnd &lt;#&gt; (distance past fragment length to calculate frequency, default: +50)
119 -oligoStart &lt;#&gt; (oligo bias start)
120 -oligoEnd &lt;#&gt; (oligo bias end)
121 -normGC &lt;target GC profile file&gt; (i.e. tagGCcontent.txt file from control experiment)
122 Use &quot;-normGC default&quot; to match the genomic GC distribution
123 -normFixedOligo &lt;oligoFreqFile&gt; (normalize 5&apos; end bias, &quot;-normFixedOligo default&quot; ok)
124 -minNormRatio &lt;#&gt; (Minimum deflation ratio of tag counts, default: 0.25)
125 -maxNormRatio &lt;#&gt; (Maximum inflation ratio of tag counts, default: 2.0)
126 -iterNorm &lt;#&gt; (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the
127 resulting distrubtion is no more than #% different than target, i.e. 0.1,default: off)
128
129 Paired-end/HiC options
130 -illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1)
131 -removePEbg (remove paired end tags within 1.5x fragment length on same chr)
132 -PEbgLength &lt;#&gt; (remove PE reads facing on another within this distance, default: 1.5x fragLen)
133 -restrictionSite &lt;seq&gt; (i.e. AAGCTT for HindIII, assign data &lt; 1.5x fragment length to sites)
134 Must specify genome sequence directory too. (-rsmis &lt;#&gt; to specify mismatches, def: 0)
135 -both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all)
136 -removeSelfLigation (removes reads linking same restriction fragment)
137 -removeRestrictionEnds (removes reads starting on a restriction fragment)
138 -assignMidPoint (will place reads in the middle of HindIII fragments)
139 -restrictionSiteLength &lt;#&gt; (maximum distance from restriction site, default: 1.5x fragLen)
140 -removeSpikes &lt;size bp&gt; &lt;#&gt; (remove tags from regions with &gt; than # times
141 the average tags per size bp, suggest &quot;-removeSpikes 10000 5&quot;)
142
143
144 </help>
145 </tool>
146