comparison computeMatrix.xml @ 0:d957e25e18a3 draft

Uploaded
author bgruening
date Thu, 14 Nov 2013 16:39:18 -0500
parents
children c53a73b8eef9
comparison
equal deleted inserted replaced
-1:000000000000 0:d957e25e18a3
1 <tool id="deeptools_computeMatrix" name="computeMatrix" version="1.0">
2 <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description>
3 <expand macro="requirements" />
4 <macros>
5 <import>deepTools_macros.xml</import>
6 </macros>
7 <command>
8 #import tempfile
9
10 #set $temp_input_handle = tempfile.NamedTemporaryFile()
11 #set $temp_input_path = $temp_input_handle.name
12 #silent $temp_input_handle.close()
13
14 #for $rf in $regionsFiles:
15 cat "$rf.regionsFile" >> $temp_input_path;
16 #if str($rf.label.value).strip():
17 echo "\#$rf.label.value" >> $temp_input_path;
18 #else:
19 echo "\#$rf.regionsFile.name" >> $temp_input_path;
20 #end if
21 #end for
22
23
24 computeMatrix
25
26 $mode.mode_select
27 --regionsFileName '$temp_input_path'
28 --scoreFileName '$scoreFile'
29 --outFileName '$outFileName'
30
31 @THREADS@
32
33 #if $output.showOutputSettings == "yes"
34 #if $output.saveData:
35 --outFileNameData '$outFileNameData'
36 #end if
37 #if $output.saveMatrix:
38 --outFileNameMatrix '$outFileNameMatrix'
39 #end if
40
41 #if $output.saveSortedRegions:
42 --outFileSortedRegions '$outFileSortedRegions'
43 #end if
44 #end if
45
46 #if $mode.mode_select == "reference-point":
47 --referencePoint $mode.referencePoint
48 $mode.nanAfterEnd
49 --beforeRegionStartLength $mode.beforeRegionStartLength
50 --afterRegionStartLength $mode.afterRegionStartLength
51 #else
52 --regionBodyLength $mode.regionBodyLength
53 --startLabel $mode.startLabel
54 --endLabel $mode.endLabel
55 #if $mode.regionStartLength.regionStartLength_select == "yes":
56 --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
57 --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
58 #end if
59 #end if
60
61 #if $advancedOpt.showAdvancedOpt == "yes":
62 --sortRegions '$advancedOpt.sortRegions'
63 --sortUsing '$advancedOpt.sortUsing'
64 --averageTypeBins '$advancedOpt.averageTypeBins'
65 $advancedOpt.missingDataAsZero
66 $advancedOpt.skipZeros
67 --binSize $advancedOpt.binSize
68
69 #if $advancedOpt.minThreshold:
70 --minThreshold $advancedOpt.minThreshold
71 #end if
72 #if $advancedOpt.maxThreshold:
73 --maxThreshold $advancedOpt.maxThreshold
74 #end if
75 #if $advancedOpt.scale:
76 --scale $advancedOpt.scale
77 #end if
78
79 #end if
80 ; rm $temp_input_path
81
82 </command>
83 <inputs>
84
85 <repeat name="regionsFiles" title="regions to plot" min="1">
86 <param name="regionsFile" format="bed" type="data" label="Regions to plot" help="File, in BED format, containing the regions to plot."/>
87 <param name="label" type="text" size="30" optional="true" value="" label="Label" help="Label to use in the output."/>
88 </repeat>
89
90 <param name="scoreFile" format="bigwig" type="data" label="Score file" help="Should be a bigWig file (containing a score, usually covering the whole genome). You can generate a bigWig file either from a bedGraph or WIG file using UCSC tools or from a BAM file using the deepTool bamCoverage."/>
91
92 <conditional name="mode" >
93 <param name="mode_select" type="select" label="computeMatrix has two main output options" help="In the scale-regions mode, all regions in the BED file are stretched or shrunk to the same length (bp) that is indicated by the user. Reference-point refers to a position within the BED regions (e.g start of region). In the reference-point mode only those genomic positions before (downstream) and/or after (upstream) the reference point will be plotted.">
94 <option value="scale-regions" selected="true">scale-regions</option>
95 <option value="reference-point">reference-point</option>
96 </param>
97
98 <when value="scale-regions" >
99 <param name="regionBodyLength" type="integer" value="500" label="Distance in bp to which all regions are going to be fitted"/>
100 <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. &quot;peak start&quot;." />
101 <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="Label shown in the plot for the region end. Default is TES (transcription end site)."/>
102 <conditional name="regionStartLength">
103 <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions">
104 <option value="no" selected="true">no</option>
105 <option value="yes">yes</option>
106 </param>
107 <when value="no" />
108 <when value="yes">
109 <param name="beforeRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/>
110
111 <param name="afterRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/>
112 </when>
113 </conditional>
114 </when>
115
116 <when value="reference-point">
117 <param name="referencePoint" type="select" label="The reference point for the plotting">
118 <option value="TSS" selected="true">region start (TSS)</option>
119 <option value="TES" selected="true">region end (TES)</option>
120 <option value="center" selected="true">center of the region</option>
121 </param>
122 <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue="" label="Discard any values after the region end" help="This is useful to visualize the region end when not using the scale-regions mode and when the reference-point is set to the TSS."/>
123 <param name="beforeRegionStartLength" type="integer" value="1000" min="1" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/>
124
125 <param name="afterRegionStartLength" type="integer" value="1000" min="1" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/>
126 </when>
127 </conditional>
128
129 <conditional name="output" >
130 <param name="showOutputSettings" type="select" label="Show additional output options" >
131 <option value="no" selected="true">no</option>
132 <option value="yes">yes</option>
133 </param>
134 <when value="no" />
135 <when value="yes">
136 <param name="saveData" type="boolean" label="Save the averages per matrix column into a text file" help="This corresponds to the underlying data used to plot a summary profile."/>
137 <param name="saveMatrix" type="boolean" label="Save the matrix of values underlying the heatmap" help="This matrix can easily be loaded into R or other programs."/>
138 <param name="saveSortedRegions" type="boolean" label="Save the regions after skiping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/>
139 </when>
140 </conditional>
141
142 <conditional name="advancedOpt" >
143 <param name="showAdvancedOpt" type="select" label="Show advanced options" >
144 <option value="no" selected="true">no</option>
145 <option value="yes">yes</option>
146 </param>
147 <when value="no" />
148 <when value="yes">
149 <param name="binSize" type="integer" value="100" min="1" optional="true" label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length" />
150 <param name="sortRegions" type="select" label="Sort regions"
151 help="Whether the output file should present the regions sorted.">
152 <option value="no" selected="true">no ordering</option>
153 <option value="descend">descending order</option>
154 <option value="ascend">ascending order</option>
155 </param>
156
157 <param name="sortUsing" type="select" label="Method used for sorting." help="The value is computed for each row." >
158 <option value="mean" selected="true">mean</option>
159 <option value="median">median</option>
160 <option value="min">min</option>
161 <option value="max">max</option>
162 <option value="sum">sum</option>
163 <option value="region_length">region length</option>
164 </param>
165
166 <param name="averageTypeBins" type="select" label="Define the type of statistic that should be displayed." help="The value is computed for each bin.">
167 <option value="mean" selected="true">mean</option>
168 <option value="median">median</option>
169 <option value="min">min</option>
170 <option value="max">max</option>
171 <option value="sum">sum</option>
172 <option value="std">std</option>
173 </param>
174
175 <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" label="Indicate missing data as zero" help="Set to &quot;yes&quot;, if missing data should be indicated as zeros. Default is to ignore such cases which will be depicted as black areas in the heatmap. (see &quot;Missing data color&quot; options of the heatmapper for additional options)."/>
176 <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" label="Skip zeros" help="Whether regions with only scores of zero should be included or not. Default is to include them."/>
177 <param name="minThreshold" type="float" optional="true" label="Minimum threshold" help="Any region containing a value that is equal or less than this numeric value will be skipped. This is useful to skip, for example, genes where the read count is zero for any of the bins. This could be the result of unmappable areas and can bias the overall results."/>
178 <param name="maxThreshold" type="float" optional="true" label="Maximum threshold" help="Any region containing a value that is equal or higher that this numeric value will be skipped. The max threshold is useful to skip those few regions with very high read counts (e.g. major satellites) that may bias the average values."/>
179 <param name="scale" type="float" optional="true" label="Scale" help="If set, all values are multiplied by this number."/>
180 </when>
181 </conditional>
182
183 </inputs>
184 <outputs>
185 <data format="bgzip" name="outFileName" label="${tool.name} on ${on_string}: matrix">
186 </data>
187 <data format="tabular" name="outFileNameData" label="${tool.name} on ${on_string}: raw data">
188 <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter>
189 </data>
190 <data format="tabular" name="outFileNameMatrix" label="${tool.name} on ${on_string}: matrix of values">
191 <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter>
192 </data>
193 <data format="bed" name="outFileSortedRegions" label="${tool.name} on ${on_string}: sorted/filtered regions">
194 <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter>
195 </data>
196 </outputs>
197 <!--
198 computeMatrix -S test.bw -R test2.bed -a 100 -b 100 -bs 1
199 -->
200 <tests>
201 <test>
202 <param name="regionsFile" value="test2.bed" ftype="bed" />
203 <param name="scoreFile" value="test.bw" ftype="bigwig" />
204 <param name="advancedOpt.binSize" value="1" />
205 <param name="mode.beforeRegionStartLength" value="100" />
206 <param name="mode.afterRegionStartLength" value="100" />
207 <output name="outFileName" file="master.mat.gz" ftype="bgzip" compare="sim_size" delta="100" />
208 </test>
209 </tests>
210 <help>
211
212 **What it does**
213
214 This tool summarizes and prepares an intermediary file
215 containing scores associated with genomic regions that can be used
216 afterwards to plot a heatmap or a profile.
217
218 Genomic regions can really be anything - genes, parts of genes, ChIP-seq
219 peaks, favorite genome regions... as long as you provide a proper file
220 in BED or INTERVAL format. This tool can also be used to filter and sort
221 regions according to their score.
222
223
224 .. image:: $PATH_TO_IMAGES/flowChart_computeMatrixetc.png
225 :alt: Relationship between computeMatrix, heatmapper and profiler
226
227
228 -----
229
230 .. class:: infomark
231
232 @REFERENCES@
233
234 </help>
235 </tool>