comparison hairpinTool.xml @ 13:7aaa9bc23e3c

Added support for paired end reads - Changed terminology to generalise to sgRNA CRISPR experiments. - Added option to include second factor for statistical power - Added option to filter out samples with low counts - Added support for paired end reads - Added option to highlight only positive or negative fold change in smear plot - Fixed bug that caused tool to stop if more than enough sample annotations were supplied
author shian_su <registertonysu@gmail.com>
date Tue, 14 Oct 2014 17:05:07 +1100
parents c0a76e30d61b
children 44130e484a97
comparison
equal deleted inserted replaced
12:ebb4cb1e8e35 13:7aaa9bc23e3c
1 <tool id="shRNAseq" name="shRNAseq Tool" version="1.0.13"> 1 <tool id="shRNAseq" name="shRNAseq Tool" version="1.2.0">
2 <description> 2 <description>
3 Analyse hairpin differential representation using edgeR 3 Analyse differential representation for shRNAseq and sgRNA based procedures
4 using edgeR package from Bioconductor.
4 </description> 5 </description>
5 6
6 <requirements> 7 <requirements>
7 <requirement type="R-module" version="3.6.2">edgeR</requirement> 8 <requirement type="R-module" version="3.7.17">edgeR</requirement>
8 <requirement type="R-module" version="3.20.7">limma</requirement> 9 <requirement type="R-module" version="3.21.16">limma</requirement>
9 <requirement type="package" version="3.0.3">R_3_0_3</requirement> 10 <requirement type="package" version="3.1.1">R_3_0_3</requirement>
10 </requirements> 11 </requirements>
11 12
12 <stdio> 13 <stdio>
13 <exit_code range="1:" level="fatal" description="Tool exception" /> 14 <exit_code range="1:" level="fatal" description="Tool exception" />
14 </stdio> 15 </stdio>
15 16
16 <command interpreter="Rscript"> 17 <command interpreter="Rscript">
17 hairpinTool.R $inputOpt.inputType 18 ampliconTool.R $inputOpt.inputType
18 #if $inputOpt.inputType=="fastq": 19 #if $inputOpt.inputType=="fastq":
20
19 #for $i, $fas in enumerate($inputOpt.fastq): 21 #for $i, $fas in enumerate($inputOpt.fastq):
20 fastq::$fas.file 22 fastq::$fas.file
23 #end for
24
25 $inputOpt.hairpin
26 $inputOpt.samples
27
28 #if $inputOpt.positions.posOption=="yes":
29 $inputOpt.positions.barstart
30 $inputOpt.positions.barend
31 0
32 0
33 $inputOpt.positions.hpstart
34 $inputOpt.positions.hpend
35 #else:
36 1
37 5
38 0
39 0
40 37
41 57
42 #end if
43 #elif $inputOpt.inputType=="pairedFastq":
44
45 #for $i, $fas in enumerate($inputOpt.fastq):
46 fastq::$fas.file
47 #end for
48
49 #for $i, $fas in enumerate($inputOpt.fastq):
50 fastqRev::$fas.fileRev
21 #end for 51 #end for
22 52
23 $inputOpt.hairpin 53 $inputOpt.hairpin
24 $inputOpt.samples 54 $inputOpt.samples
25 55
26 #if $inputOpt.positions.posOption=="yes": 56 #if $inputOpt.positions.posOption=="yes":
27 $inputOpt.positions.barstart 57 $inputOpt.positions.barstart
28 $inputOpt.positions.barend 58 $inputOpt.positions.barend
59 $inputOpt.positions.barstartRev
60 $inputOpt.positions.barendRev
29 $inputOpt.positions.hpstart 61 $inputOpt.positions.hpstart
30 $inputOpt.positions.hpend 62 $inputOpt.positions.hpend
31 #else: 63 #else:
32 1 64 1
33 5 65 5
66 0
67 0
34 37 68 37
35 57 69 57
36 #end if 70 #end if
37 #else: 71
72 #elif $inputOpt.inputType=="counts":
38 $inputOpt.counts 73 $inputOpt.counts
39 $inputOpt.hairpin 74 $inputOpt.hairpin
40 $inputOpt.samples 75 $inputOpt.samples
41 0 0 0 76 0
77 0
78 0
79 0
80 0
42 #end if 81 #end if
43 82
83 #if $inputOpt.secondaryFactor.secFactorOpt=="yes":
84 $inputOpt.secondaryFactor.secFactName
85 #else:
86 "none"
87 #end if
88
44 #if $filterCPM.filtOption=="yes": 89 #if $filterCPM.filtOption=="yes":
45 $filterCPM.cpmReq 90 $filterCPM.cpmReq
46 $filterCPM.sampleReq 91 $filterCPM.sampleReq
92 $filterCPM.readReq
47 #else: 93 #else:
48 -Inf 94 -Inf
49 -Inf 95 -Inf
96 -Inf
50 #end if 97 #end if
51 98
52 $fdr 99 $fdr
53 $lfc 100 $lfc
101 $direction
54 $workMode.mode 102 $workMode.mode
55 $outFile 103 $outFile
56 $outFile.files_path 104 $outFile.files_path
57 105
58 #if $workMode.mode=="classic": 106 #if $workMode.mode=="classic":
59 "$workMode.pair1" 107 "$workMode.pair1"
60 "$workMode.pair2" 108 "$workMode.pair2"
61 #elif $workMode.mode=="glm": 109 #elif $workMode.mode=="glm":
62 "$workMode.contrast" 110 "$workMode.contrast"
63 $workMode.roast.roastOption 111 $workMode.roast.roastOption
112
64 #if $workMode.roast.roastOption=="yes": 113 #if $workMode.roast.roastOption=="yes":
65 $workMode.roast.hairpinReq 114 $workMode.roast.hairpinReq
66 $workMode.roast.select.selOption 115 $workMode.roast.select.selOption
67 "$workMode.roast.select.selection" 116 "$workMode.roast.select.selection"
68 #else: 117 #else:
69 0 118 0
70 0 119 0
71 0 120 0
72 #end if 121 #end if
122
73 #end if 123 #end if
74 </command> 124 </command>
75 125
76 <inputs> 126 <inputs>
77 <conditional name="inputOpt"> 127 <conditional name="inputOpt">
128
78 <param name="inputType" type="select" label="Input File Type"> 129 <param name="inputType" type="select" label="Input File Type">
79 <option value="fastq">FastQ File</option> 130 <option value="fastq">FastQ File</option>
131 <option value="pairedFastq">Paired FastQ File</option>
80 <option value="counts">Table of Counts</option> 132 <option value="counts">Table of Counts</option>
81 </param> 133 </param>
82 134
83 <when value="fastq"> 135 <when value="fastq">
84 <param name="hairpin" type="data" format="tabular" 136 <param name="hairpin" type="data" format="tabular"
85 label="Hairpin Annotation"/> 137 label="Target Annotation"/>
86
87 138
88 <param name="samples" type="data" format="tabular" 139 <param name="samples" type="data" format="tabular"
89 label="Sample Annotation"/> 140 label="Sample Annotation"/>
90 141
91 <repeat name="fastq" title="FastQ Files"> 142 <repeat name="fastq" title="FastQ Files">
92 <param name="file" type="data" format="fastq"/> 143 <param name="file" type="data" format="fastq"/>
93 </repeat> 144 </repeat>
94 145
146 <conditional name="secondaryFactor">
147
148 <param name="secFactorOpt" type="select"
149 label="Include Secondary Factor">
150
151 <option value="no" selected="True">No</option>
152
153 <option value="yes">Yes</option>
154
155 </param>
156
157 <when value="yes">
158
159 <param name="secFactName" type="text" label="Secondary Factor Name"
160 size="80"/>
161
162 </when>
163
164 <when value="no">
165 </when>
166 </conditional>
167
95 <conditional name="positions"> 168 <conditional name="positions">
96 <param name="posOption" type="select" 169 <param name="posOption" type="select"
97 label="Specify Barcode and Hairpin Locations?" 170 label="Specify Sample Index and Target Sequence Locations?"
98 help="Default Positions: Barcode: 1 to 5, Hairpin: 37 to 57."> 171 help="Default Positions: Index: 1 to 5, Target: 37 to 57.">
99 <option value="no" selected="True">No</option> 172 <option value="no" selected="True">No</option>
100 <option value="yes">Yes</option> 173 <option value="yes">Yes</option>
101 </param> 174 </param>
102 175
103 <when value="yes"> 176 <when value="yes">
104 <param name="barstart" type="integer" value="1" 177 <param name="barstart" type="integer" value="1"
105 label="Barcode Starting Position"/> 178 label="Index Starting Position"/>
106 <param name="barend" type="integer" value="5" 179 <param name="barend" type="integer" value="5"
107 label="Barcode Ending Position"/> 180 label="Index Ending Position"/>
108 181
109 <param name="hpstart" type="integer" value="37" 182 <param name="hpstart" type="integer" value="37"
110 label="Hairpin Starting Position"/> 183 label="Target Starting Position"/>
111 184
112 <param name="hpend" type="integer" value="57" 185 <param name="hpend" type="integer" value="57"
113 label="Hairpin Ending Position"/> 186 label="Target Ending Position"/>
114 </when> 187 </when>
115 188
116 <when value="no"/> 189 <when value="no"/>
117 </conditional> 190 </conditional>
118 </when> 191 </when>
119 192
193 <when value="pairedFastq">
194 <param name="hairpin" type="data" format="tabular"
195 label="Target Sequence Annotation"/>
196
197 <param name="samples" type="data" format="tabular"
198 label="Sample Annotation"/>
199
200 <repeat name="fastq" title="FastQ Files">
201 <param name="file" type="data" format="fastq"/>
202 <param name="fileRev" type="data" format="fastq"/>
203 </repeat>
204
205 <conditional name="secondaryFactor">
206
207 <param name="secFactorOpt" type="select"
208 label="Include Secondary Factor">
209
210 <option value="no" selected="True">No</option>
211
212 <option value="yes">Yes</option>
213
214 </param>
215
216 <when value="yes">
217
218 <param name="secFactName" type="text" label="Secondary Factor Name"
219 size="80"/>
220
221 </when>
222
223 <when value="no">
224 </when>
225 </conditional>
226
227 <conditional name="positions">
228
229 <param name="posOption" type="select"
230 label="Specify Sample Index and Target Sequence Locations?"
231 help="Default Positions: Index: 1 to 5, Input required for
232 reverse end, Target: 37 to 57.">
233
234 <option value="no" selected="True">No</option>
235
236 <option value="yes">Yes</option>
237
238 </param>
239
240 <when value="yes">
241 <param name="barstart" type="integer" value="1"
242 label="Index Starting Position"/>
243
244 <param name="barend" type="integer" value="5"
245 label="Index Ending Position"/>
246
247 <param name="barstartRev" type="integer" value="0"
248 label="Reverse Index Starting Position"/>
249
250 <param name="barendRev" type="integer" value="0"
251 label="Reverse Index Ending Position"/>
252
253 <param name="hpstart" type="integer" value="37"
254 label="Target Starting Position"/>
255
256 <param name="hpend" type="integer" value="57"
257 label="Target Ending Position"/>
258 </when>
259
260 <when value="no">
261 </when>
262
263 </conditional>
264
265 </when>
266
120 <when value="counts"> 267 <when value="counts">
268
121 <param name="counts" type="data" format="tabular" label="Counts Table"/> 269 <param name="counts" type="data" format="tabular" label="Counts Table"/>
270
122 <param name="hairpin" type="data" format="tabular" 271 <param name="hairpin" type="data" format="tabular"
123 label="Hairpin Annotation"/> 272 label="Target Sequence Annotation"/>
273
124 <param name="samples" type="data" format="tabular" 274 <param name="samples" type="data" format="tabular"
125 label="Sample Annotation"/> 275 label="Sample Annotation"/>
276
277 <conditional name="secondaryFactor">
278
279 <param name="secFactorOpt" type="select"
280 label="Include Secondary Factor">
281
282 <option value="no" selected="True">No</option>
283
284 <option value="yes">Yes</option>
285
286 </param>
287
288 <when value="yes">
289
290 <param name="secFactName" type="text" label="Secondary Factor Name"
291 size="80"/>
292
293 </when>
294
295 <when value="no">
296 </when>
297
298 </conditional>
299
126 </when> 300 </when>
301
127 </conditional> 302 </conditional>
128 303
129 <conditional name="filterCPM"> 304 <conditional name="filterCPM">
130 <param name="filtOption" type="select" label="Filter Low CPM?" 305 <param name="filtOption" type="select" label="Filter Low CPM?"
131 help="Ignore hairpins with very low representation when performing 306 help="Ignore target sequences with very low representation when
132 analysis."> 307 performing analysis.">
133 <option value="yes">Yes</option> 308 <option value="yes">Yes</option>
134 <option value="no">No</option> 309 <option value="no">No</option>
135 </param> 310 </param>
136 311
137 <when value="yes"> 312 <when value="yes">
138 <param name="cpmReq" type="float" value="0.5" min="0" 313 <param name="cpmReq" type="float" value="0.5" min="0"
139 label="Minimum CPM"/> 314 label="Minimum CPM"/>
140 315
141 <param name="sampleReq" type="integer" value="1" min="0" 316 <param name="sampleReq" type="integer" value="1" min="0"
142 label="Minimum Samples" 317 label="Minimum Samples"
143 help="Filter out all the genes that do not meet the minimum 318 help="Filter out all the genes that do not meet the minimum
144 CPM in at least this many samples."/> 319 CPM in at least this many samples."/>
320
321 <param name="readReq" type="integer" value="1000" min="0"
322 label="Minimum Reads"
323 help="Filter out all samples that do not have the minimum
324 number of reads."/>
325
145 </when> 326 </when>
146 327
147 <when value="no"/> 328 <when value="no"/>
148 329
149 </conditional> 330 </conditional>
173 expression."/> 354 expression."/>
174 355
175 <conditional name="roast"> 356 <conditional name="roast">
176 <param name="roastOption" type="select" 357 <param name="roastOption" type="select"
177 label="Perform Gene Level Analysis?" 358 label="Perform Gene Level Analysis?"
178 help="Analyse LogFC tendencies for hairpins belonging 359 help="Analyse LogFC tendencies for target sequences belonging
179 to the same gene."> 360 to the same gene. NOTE: this is a slow procedure that
361 scales badly with the number of genes analysed.">
180 <option value="no">No</option> 362 <option value="no">No</option>
181 <option value="yes">Yes</option> 363 <option value="yes">Yes</option>
182 </param> 364 </param>
183 365
184 <when value="yes"> 366 <when value="yes">
185 <param name="hairpinReq" type="integer" value="2" min="2" 367 <param name="hairpinReq" type="integer" value="2" min="2"
186 label="Minimum Hairpins" 368 label="Minimum Targets Found"
187 help="Only genes with at least this many hairpins will 369 help="Only genes with at least this many target sequences
188 be analysed."/> 370 found will be analysed."/>
189 371
190 <conditional name="select"> 372 <conditional name="select">
191 <param name="selOption" type="select" 373 <param name="selOption" type="select"
192 label="Gene Selection Method"> 374 label="Gene Selection Method">
193 <option value="rank">By p-value Rank</option> 375 <option value="rank">By p-value Rank</option>
221 403
222 <param name="fdr" type="float" value="0.05" min="0" max="1" 404 <param name="fdr" type="float" value="0.05" min="0" max="1"
223 label="FDR Threshold" 405 label="FDR Threshold"
224 help="All observations below this threshold will be highlighted 406 help="All observations below this threshold will be highlighted
225 in the smear plot."/> 407 in the smear plot."/>
408
226 <param name="lfc" type="float" value="0" min="0" 409 <param name="lfc" type="float" value="0" min="0"
227 label="Absolute LogFC Threshold" 410 label="Absolute LogFC Threshold"
228 help="In additional to meeting the FDR requirement, the absolute 411 help="In additional to meeting the FDR requirement, the absolute
229 value of the log-fold-change of the observation must be above 412 value of the log-fold-change of the observation must be above
230 this threshold to be highlighted."/> 413 this threshold to be highlighted."/>
414
415 <param name="direction" type="select" label="Highlight Option"
416 help="Only hightlight positive or negative fold changes in smear plot?">
417 <option value="all">Default</option>
418 <option value="up">Positive Only</option>
419 <option value="down">Negative Only</option>
420 </param>
231 </inputs> 421 </inputs>
232 422
233 <outputs> 423 <outputs>
234 <data format="html" name="outFile" label="shRNAseq Analysis"/> 424 <data format="html" name="outFile" label="TagSeq Analysis"/>
235 </outputs> 425 </outputs>
236 <help> 426 <help>
237 .. class:: infomark 427 .. class:: infomark
238 428
239 **What it does** 429 **What it does**
240 430
241 Given tables containing information about the hairpins and their associated 431 Given tables containing information about the hairpins/sgRNA and their
242 barcodes, information about the samples and fastq file containing the hairpin 432 associated sample indices, information about the samples and fastq file
243 reads. This tool will generate plots and tables for the analysis of differential 433 containing the sequencing reads. This tool will generate plots and tables for
244 representation. 434 the analysis of differential representation.
245 435
246 .. class:: infomark 436 .. class:: infomark
247 437
248 A tutorial of how to use this tool is available at: 438 A tutorial of how to use this tool is available at:
249 http://bioinf.wehi.edu.au/shRNAseq/galaxy.html 439 http://bioinf.wehi.edu.au/shRNAseq/galaxy.html
255 **INPUTS** 445 **INPUTS**
256 446
257 **Input File Type:** 447 **Input File Type:**
258 448
259 This tool is able to either generate counts from a raw FastQ file given the 449 This tool is able to either generate counts from a raw FastQ file given the
260 information regarding the samples and hairpins. Alternatively if a table of 450 information regarding the samples and hairpins/sgRNA. Alternatively if a table
261 counts has already been generated it can also be used. 451 of counts has already been generated it can also be used.
262 452
263 **Counts Table (Counts Input):** 453 **Counts Table (Counts Input):**
264 454
265 A tab delimited text table of information regarding the counts of hairpins. 455 A tab delimited text table of information regarding the counts of
266 Should have a column 'ID' to denote the hairpins that counts correspond to. Each 456 hairpins/sgRNA. Should have a column 'ID' to denote the hairpins/sgRNA that
267 additional column should have titles corresponding to the label for the sample. 457 counts correspond to. Each additional column should have titles corresponding to
458 the label for the sample.
268 459
269 Example:: 460 Example::
270 461
271 ID Sample1 Sample2 Sample3 462 ID Sample1 Sample2 Sample3
272 Control1 49802 48014 40148 463 Control1 49802 48014 40148
279 Hairpin5 2491 2769 2691 470 Hairpin5 2491 2769 2691
280 Hairpin6 1294 1486 1642 471 Hairpin6 1294 1486 1642
281 Hairpin7 49501 49076 47611 472 Hairpin7 49501 49076 47611
282 ... 473 ...
283 474
284 **Hairpin Annotation:** 475 **Target Sequence Annotation:**
285 476
286 A tab delimited text table of information regarding the hairpins. Should have 477 A tab delimited text table of information regarding the targetted
287 columns 'ID', 'Sequences' and 'Gene' to uniquely identify the hairpin, align it 478 hairpins/sgRNA sequence. Should have columns 'ID', 'Sequences' and 'Gene' to
288 with the reads to produce counts and identify which gene the hairpin acts on. 479 uniquely identify the target, align it with the reads to produce counts and
480 identify which gene the target acts on.
289 481
290 NOTE: the column names are case sensitive and should be input exactly as they 482 NOTE: the column names are case sensitive and should be input exactly as they
291 are shown here. 483 are shown here.
292 484
293 Example:: 485 Example::
294 486
295 ID Sequences Gene 487 ID Sequences Gene
296 Control1 TCTCGCTTGGGCGAGAGTAAG 2 488 Control1 TCTCGCTTGGGCGAGAGTAAG 2
297 Control2 CCGCCTGAAGTCTCTGATTAA 2 489 Control2 CCGCCTGAAGTCTCTGATTAA 2
298 Control3 AGGAATTATAATGCTTATCTA 2 490 Control3 AGGAATTATAATGCTTATCTA 2
299 Hairpin1 AAGGCAGAGACTGACCACCTA 4 491 Hairpin1 AAGGCAGAGACTGACCACCTA 4
300 Hairpin2 GAGCGACCTGGTGTTACTCTA 4 492 Hairpin2 GAGCGACCTGGTGTTACTCTA 4
301 Hairpin3 ATGGTGTAAATAGAGCTGTTA 4 493 Hairpin3 ATGGTGTAAATAGAGCTGTTA 4
302 Hairpin4 CAGCTCATCTTCTGTGAAGAA 4 494 Hairpin4 CAGCTCATCTTCTGTGAAGAA 4
303 Hairpin5 CAGCTCTGTGGGTCAGAAGAA 4 495 Hairpin5 CAGCTCTGTGGGTCAGAAGAA 4
304 Hairpin6 CCAGGCACAGATCTCAAGATA 4 496 Hairpin6 CCAGGCACAGATCTCAAGATA 4
305 Hairpin7 ATGACAAGAAAGACATCTCAA 7 497 Hairpin7 ATGACAAGAAAGACATCTCAA 7
306 ... 498 ...
307 499
308 **Sample Annotation (FastQ Input):** 500 **Sample Annotation (FastQ Input):**
309 501
310 A tab delimited text table of information regarding the samples. Should have 502 A tab delimited text table of information regarding the samples. Should have
311 columns 'ID', 'Sequences' and 'group' to uniquely identify each sample, identify 503 columns 'ID', 'Sequences' and 'group' to uniquely identify each sample, identify
312 the sample in the reads by its barcode sequence and correctly group replicates 504 the sample in the reads by its sample index sequence and correctly group
313 for analysis. Additional columns may inserted for annotation purposes and will 505 replicates for analysis. Additional columns may inserted for annotation purposes
314 not interfere with analysis as long as the necessary columns are present. 506 and will not interfere with analysis as long as the necessary columns are
315 507 present.
316 NOTE: the column names are case sensitive and should be input exactly as they 508
317 are shown here. 509 NOTE: With the exception of other_group, column names are case sensitive and
510 should be input exactly as they are shown here. The other_group column can be
511 named by the user and specified in the "Include Secondary Factor" option of the
512 tool.
318 513
319 Example:: 514 Example::
320 515
321 ID Sequences group Replicate 516 ID Sequences group other_group Replicate
322 3 GAAAG Day 2 1 517 3 GAAAG Day 2 male 1
323 6 GAACC Day 10 1 518 6 GAACC Day 10 female 1
324 9 GAAGA Day 5 GFP neg 1 519 9 GAAGA Day 5 GFP neg male 1
325 16 GAATT Day 5 GFP pos 1 520 16 GAATT Day 5 GFP pos male 1
326 18 GACAC Day 2 2 521 18 GACAC Day 2 female 2
327 21 GACCA Day 10 2 522 21 GACCA Day 10 male 2
328 28 GACGT Day 5 GFP neg 2 523 28 GACGT Day 5 GFP neg male 2
329 31 GACTG Day 5 GFP pos 2 524 31 GACTG Day 5 GFP pos female 2
330 33 GAGAA Day 2 3 525 33 GAGAA Day 2 male 3
331 40 GAGCT Day 10 3 526 40 GAGCT Day 10 female 3
332 ... 527 ...
333 528
334 **Specify Barcode and Hairpin Locations (FastQ Input):** 529 **Include Secondary Factor**
530 If there are two factors involved in the experiment (i.e. Age and Gender) then
531 then secondary factor should be included to improve the statistical analysis.
532 The secondary factor should be specified as a column in the sample annotation
533 file and the corresponding column name should be input exactly as it is into
534 the provided field in the tool.
535
536 NOTE: Currently the secondary factor is used only to improve statistical
537 analysis, comparisons can only be made in the primary factor specified as
538 "group" in the sample annotation.
539
540 **Specify Sample Index and Target Sequence Locations (FastQ Input):**
335 541
336 It is assumed that in the sequencing reads that the first 5 bases are the 542 It is assumed that in the sequencing reads that the first 5 bases are the
337 barcodes and that bases 37-57 are the hairpins. If this is not the case then the 543 sample index sequence and that bases 37-57 are the hairpins/sgRNA. If this is
338 values of the positions can be changed, however it still requires the barcodes 544 not the case then the values of the positions can be changed, however it still
339 and hairpins to be in a consistent location an in a continuous sequence. 545 requires the sample indices and hairpins/sgRNA to be in a consistent location an
546 in a continuous sequence.
547
548 NOTE: position values start at 1 for the first base.
340 549
341 **Filter Low CPM?:** 550 **Filter Low CPM?:**
342 551
343 Often in a large screen there may members with very low counts which are of no 552 Often in a large screen there may members with very low counts which are of no
344 interest in the experiment, these may be filtered out to speed up computations. 553 interest in the experiment, these may be filtered out to speed up computations.
345 Filtering will be based on counts per million in a required number of samples. 554 Filtering will be based on counts per million in a required number of samples.
346 555
347 **Analysis Type:** 556 **Analysis Type:**
348 557
349 * **Classic Exact Test:** This allows two experimental groups to be compared and 558 * **Classic Exact Test:** This allows two experimental groups to be compared
350 p-values for differential representation derivec for each hairpin. Simple and 559 and p-values for differential representation derivec for each target
351 fast for straightforward comparisons. In this option you will have the option of 560 sequence. Simple and fast for straightforward comparisons. In this option you
352 "*Compare* x *To* y" which implicitly subtracts the data from y from that of x 561 will have the option of "*Compare* x *To* y" which implicitly subtracts the
353 to produce the comparison. 562 data from y from that of x to produce the comparison.
354 563
355 * **Generalised Linear Model:** This allow for complex contrasts to be specified 564 * **Generalised Linear Model:** This allow for complex contrasts to be specified
356 and also gene level analysis to be performed. If this option is chosen then 565 and also gene level analysis to be performed. If this option is chosen then
357 contrasts must be explicitly stated in equations and multiple contrasts can be 566 contrasts must be explicitly stated in equations and multiple contrasts can
358 made. In addition there will be the option to analyse hairpins on a per-gene 567 be made. In addition there will be the option to analyse hairpins/sgRNA on a
359 basis to see if hairpins belonging to a particular gene have any overall 568 per-gene basis to see if hairpins/sgRNA belonging to a particular gene have
360 tendencies for the direction of their log-fold-change. 569 any overall tendencies for the direction of their log-fold-change.
361 570
362 **FDR Threshold:** 571 **FDR Threshold:**
363 The smear plot in the output will have hairpins highlighted to signify 572 The smear plot in the output will have hairpins/sgRNA highlighted to signify
364 significant differential representation. The significance is determined by 573 significant differential representation. The significance is determined by
365 contorlling the false discovery rate, only those with a FDR lower than the 574 contorlling the false discovery rate, only those with a FDR lower than the
366 threshold will be highlighted in the plot. 575 threshold will be highlighted in the plot.
367 576
368 ----- 577 -----
377 to cite the appropriate methodology articles that describe the statistical 586 to cite the appropriate methodology articles that describe the statistical
378 methods implemented in limma, depending on which limma functions you are 587 methods implemented in limma, depending on which limma functions you are
379 using. The methodology articles are listed in Section 2.1 of the limma 588 using. The methodology articles are listed in Section 2.1 of the limma
380 User's Guide. 589 User's Guide.
381 590
382 * Smyth, GK (2005). Limma: linear models for microarray data. In: 591 * Smyth, GK (2005). Limma: linear models for microarray data. In:
383 'Bioinformatics and Computational Biology Solutions using R and 592 'Bioinformatics and Computational Biology Solutions using R and
384 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, 593 Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry,
385 W. Huber (eds), Springer, New York, pages 397-420. 594 W. Huber (eds), Springer, New York, pages 397-420.
386 595
387 .. class:: infomark 596 .. class:: infomark
388 597
389 edgeR 598 edgeR
390 599
391 Please cite the first paper for the software itself and the other papers for 600 Please cite the first paper for the software itself and the other papers for
392 the various original statistical methods implemented in edgeR. See 601 the various original statistical methods implemented in edgeR. See
393 Section 1.2 in the User's Guide for more detail. 602 Section 1.2 in the User's Guide for more detail.
394 603
395 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor 604 * Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
396 package for differential expression analysis of digital gene expression 605 package for differential expression analysis of digital gene expression
397 data. Bioinformatics 26, 139-140 606 data. Bioinformatics 26, 139-140
398 607
399 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing 608 * Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing
400 differences in tag abundance. Bioinformatics 23, 2881-2887 609 differences in tag abundance. Bioinformatics 23, 2881-2887
401 610
402 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative 611 * Robinson MD and Smyth GK (2008). Small-sample estimation of negative
403 binomial dispersion, with applications to SAGE data. 612 binomial dispersion, with applications to SAGE data.
404 Biostatistics, 9, 321-332 613 Biostatistics, 9, 321-332
405 614
406 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis 615 * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis
407 of multifactor RNA-Seq experiments with respect to biological variation. 616 of multifactor RNA-Seq experiments with respect to biological variation.
408 Nucleic Acids Research 40, 4288-4297 617 Nucleic Acids Research 40, 4288-4297
409 618
410 Report problems to: su.s@wehi.edu.au 619 Report problems to: su.s@wehi.edu.au
411 620
412 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html 621 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
413 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html 622 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
414 </help> 623 </help>
415 </tool> 624 </tool>
416