shrnaseq: hairpinTool.xml comparison

comparison hairpinTool.xml @ 2:076ca575208f

First commit

author	shian_su <registertonysu@gmail.com>
date	Fri, 21 Feb 2014 12:52:56 +1100
parents
children	3d04308a99f9

comparison

equal deleted inserted replaced

-:aa02cf19e1b3
+:076ca575208f
+<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.5">
+<description>
+Analyse hairpin differential representation using edgeR
+</description>
+<requirements>
+<requirement type="R-module">edgeR</requirement>
+<requirement type="R-module">limma</requirement>
+</requirements>
+<stdio>
+<exit_code range="1:" level="fatal" description="Tool exception" />
+</stdio>
+<command interpreter="Rscript">
+hairpinTool.R $inputOpt.type
+#if $inputOpt.type=="fastq":
+#for $i, $fas in enumerate($inputOpt.fastq):
+fastq::$fas.file
+#end for
+$inputOpt.hairpin
+$inputOpt.samples
+#if $inputOpt.positions.option=="yes":
+$inputOpt.positions.barstart
+$inputOpt.positions.barend
+$inputOpt.positions.hpstart
+$inputOpt.positions.hpend
+#else:
+1
+5
+37
+57
+#end if
+#else:
+$inputOpt.counts
+$inputOpt.anno
+"$inputOpt.factors"
+0 0 0
+#end if
+#if $filterCPM.option=="yes":
+$filterCPM.cpmReq
+$filterCPM.sampleReq
+#else:
+-Inf
+-Inf
+#end if
+$fdr
+$lfc
+$workMode.mode
+$outFile
+$outFile.files_path
+#if $workMode.mode=="classic":
+"$workMode.pair1"
+"$workMode.pair2"
+#else:
+"$workMode.contrast"
+$workMode.roast.option
+#if $workMode.roast.option=="yes":
+$workMode.roast.hairpinReq
+$workMode.roast.select.option
+"$workMode.roast.select.selection"
+#else:
+0
+0
+0
+#end if
+#end if
+</command>
+<inputs>
+<conditional name="inputOpt">
+<param name="type" type="select" label="Input File Type">
+<option value="fastq">FastQ File</option>
+<option value="counts">Table of Counts</option>
+</param>
+<when value="fastq">
+<param name="hairpin" type="data" format="tabular"
+label="Hairpin Annotation"/>
+<param name="samples" type="data" format="tabular"
+label="Sample Annotation"/>
+<repeat name="fastq" title="FastQ Files">
+<param name="file" type="data" format="fastq"/>
+</repeat>
+<conditional name="positions">
+<param name="option" type="select"
+label="Specify Barcode and Hairpin Locations?"
+help="Default Positions: Barcode: 1 to 5, Hairpin: 37 to 57.">
+<option value="no" selected="True">No</option>
+<option value="yes">Yes</option>
+</param>
+<when value="yes">
+<param name="barstart" type="integer" value="1"
+label="Barcode Starting Position"/>
+<param name="barend" type="integer" value="5"
+label="Barcode Ending Position"/>
+<param name="hpstart" type="integer" value="37"
+label="Hairpin Starting Position"/>
+<param name="hpend" type="integer" value="57"
+label="Hairpin Ending Position"/>
+</when>
+<when value="no"/>
+</conditional>
+</when>
+<when value="counts">
+<param name="counts" type="data" format="tabular" label="Counts Table"/>
+<param name="anno" type="data" format="tabular"
+label="Hairpin Annotation"/>
+<param name="factors" type="data" format="tabular"
+label="Sample Annotation"/>
+</when>
+</conditional>
+<conditional name="filterCPM">
+<param name="option" type="select" label="Filter Low CPM?"
+help="Ignore hairpins with very low representation when performing
+analysis.">
+<option value="yes">Yes</option>
+	<option value="no">No</option>
+</param>
+<when value="yes">
+<param name="cpmReq" type="float" value="0.5" min="0" max="1"
+label="Minimum CPM"/>
+<param name="sampleReq" type="integer" value="1" min="0"
+label="Minimum Samples"
+help="Filter out all the genes that do not meet the minimum
+CPM in at least this many samples."/>
+</when>
+<when value="no"/>
+</conditional>
+<conditional name="workMode">
+<param name="mode" type="select" label="Analysis Type"
+help="Classic Exact Tests are useful for simple comparisons across
+two sampling groups. Generalised linear models allow for more
+complex contrasts and gene level analysis to be made.">
+<option value="classic">Classic Exact Test</option>
+<option value="glm">Generalised Linear Model</option>
+</param>
+<when value="classic">
+<param name="pair1" type="text" label="Compare" size="40"/>
+<param name="pair2" type="text" label="To" size="40"
+help="The analysis will subtract values of this group from those
+in the group above to establish the difference."/>
+</when>
+<when value="glm">
+<param name="contrast" type="text" size="60"
+label="Contrasts of interest"
+help="Specify equations defining contrasts to be made. Eg.
+KD-Control will result in positive fold change if KD has
+greater expression and negative if Control has greater
+expression."/>
+<conditional name="roast">
+<param name="option" type="select"
+label="Perform Gene Level Analysis?"
+help="Analyse LogFC tendencies for hairpins belonging
+to the same gene.">
+<option value="no">No</option>
+<option value="yes">Yes</option>
+</param>
+<when value="yes">
+<param name="hairpinReq" type="integer" value="2" min="2"
+label="Minimum Hairpins"
+help="Only genes with at least this many hairpins will
+be analysed."/>
+<conditional name="select">
+<param name="option" type="select"
+label="Gene Selection Method">
+<option value="rank">By p-value Rank</option>
+<option value="geneID">By Gene Identifier</option>
+</param>
+<when value="rank">
+<param name="selection" type="text" size="40" value="1:5"
+label="Ranks of Top Genes to Plot"
+help="Genes are ranked in ascending p-value for
+differential representation, individual ranks can
+be entered seperated by comma or a range seperated
+by colon."/>
+</when>
+<when value="geneID">
+<param name="selection" type="text" size="80" value=""
+label="Symbols of Genes to Plot"
+help="Select genes based on their identifier in the
+'Gene' column of the sample information file.
+Please ensure exact match with the values in input
+file and separate selections with commas."/>
+</when>
+</conditional>
+</when>
+<when value="no"/>
+</conditional>
+</when>
+</conditional>
+<param name="fdr" type="float" value="0.05" min="0" max="1"
+label="FDR Threshold"
+help="All observations below this threshold will be highlighted
+in the smear plot."/>
+<param name="lfc" type="float" value="0" min="0"
+label="Absolute LogFC Threshold"
+help="In additional to meeting the FDR requirement, the absolute
+value of the log-fold-change of the observation must be above
+this threshold to be highlighted."/>
+</inputs>
+<outputs>
+<data format="html" name="outFile" label="shRNAseq Analysis"/>
+</outputs>
+<help>
+.. class:: infomark
+**What it does**
+Given tables containing information about the hairpins and their associated
+barcodes, information about the samples and fastq file containing the hairpin
+reads. This tool will generate plots and tables for the analysis of differential
+representation.
+-----
+.. class:: infomark
+**INPUTS**
+**Input File Type:**
+This tool is able to either generate counts from a raw FastQ file given the
+information regarding the samples and hairpins. Alternatively if a table of
+counts has already been generated it can also be used.
+**Counts Table (Counts Input):**
+A tab delimited text table of information regarding the counts of hairpins.
+Should have a column 'ID' to denote the hairpins that counts correspond to. Each
+additional column should have titles corresponding to the label for the sample.
+Example::
+ID  Sample1 Sample2 Sample3
+Control1 49802 48014 40148
+Control2 12441 16352 14232
+Control3 9842  9148  9111
+Hairpin1 3300  3418  2914
+Hairpin2 91418 95812 93174
+Hairpin3 32985 31975 35104
+Hairpin4 12082 14081 14981
+Hairpin5 2491  2769  2691
+Hairpin6 1294  1486  1642
+Hairpin7 49501 49076 47611
+...
+**Hairpin Annotation:**
+A tab delimited text table of information regarding the hairpins. Should have
+columns 'ID', 'Sequences' and 'Gene' to uniquely identify the hairpin, align it
+with the reads to produce counts and identify which gene the hairpin acts on.
+NOTE: the column names are case sensitive and should be input exactly as they
+are shown here.
+Example::
+ID	Sequences	Gene
+Control1	TCTCGCTTGGGCGAGAGTAAG	2
+Control2	CCGCCTGAAGTCTCTGATTAA	2
+Control3	AGGAATTATAATGCTTATCTA	2
+Hairpin1	AAGGCAGAGACTGACCACCTA	4
+Hairpin2	GAGCGACCTGGTGTTACTCTA	4
+Hairpin3	ATGGTGTAAATAGAGCTGTTA	4
+Hairpin4	CAGCTCATCTTCTGTGAAGAA	4
+Hairpin5	CAGCTCTGTGGGTCAGAAGAA	4
+Hairpin6	CCAGGCACAGATCTCAAGATA	4
+Hairpin7	ATGACAAGAAAGACATCTCAA	7
+...
+**Sample Annotation (FastQ Input):**
+A tab delimited text table of information regarding the samples. Should have
+columns 'ID', 'Sequences' and 'group' to uniquely identify each sample, identify
+the sample in the reads by its barcode sequence and correctly group replicates
+for analysis. Additional columns may inserted for annotation purposes and will
+not interfere with analysis as long as the necessary columns are present.
+NOTE: the column names are case sensitive and should be input exactly as they
+are shown here.
+Example::
+ID	Sequences	group	Replicate
+3	GAAAG	Day 2	1
+6	GAACC	Day 10	1
+9	GAAGA	Day 5 GFP neg	1
+16	GAATT	Day 5 GFP pos	1
+18	GACAC	Day 2	2
+21	GACCA	Day 10	2
+28	GACGT	Day 5 GFP neg	2
+31	GACTG	Day 5 GFP pos	2
+33	GAGAA	Day 2	3
+40	GAGCT	Day 10	3
+...
+**Specify Barcode and Hairpin Locations (FastQ Input):**
+It is assumed that in the sequencing reads that the first 5 bases are the
+barcodes and that bases 37-57 are the hairpins. If this is not the case then the
+values of the positions can be changed, however it still requires the barcodes
+and hairpins to be in a consistent location an in a continuous sequence.
+**Filter Low CPM?:**
+Often in a large screen there may members with very low counts which are of no
+interest in the experiment, these may be filtered out to speed up computations.
+Filtering will be based on counts per million in a required number of samples.
+**Analysis Type:**
+* **Classic Exact Test:** This allows two experimental groups to be compared and
+p-values for differential representation derivec for each hairpin. Simple and
+fast for straightforward comparisons. In this option you will have the option of
+"*Compare* x *To* y" which implicitly subtracts the data from y from that of x
+to produce the comparison.
+* **Generalised Linear Model:** This allow for complex contrasts to be specified
+and also gene level analysis to be performed. If this option is chosen then
+contrasts must be explicitly stated in equations and multiple contrasts can be
+made. In addition there will be the option to analyse hairpins on a per-gene
+basis to see if hairpins belonging to a particular gene have any overall
+tendencies for the direction of their log-fold-change.
+**FDR Threshold:**
+The smear plot in the output will have hairpins highlighted to signify
+significant differential representation. The significance is determined by
+contorlling the false discovery rate, only those with a FDR lower than the
+threshold will be highlighted in the plot.
+-----
+**Citations:**
+.. class:: infomark
+limma
+Please cite the paper below for the limma software itself.  Please also try
+to cite the appropriate methodology articles that describe the statistical
+methods implemented in limma, depending on which limma functions you are
+using.  The methodology articles are listed in Section 2.1 of the limma
+User's Guide.
+	* Smyth, GK (2005). Limma: linear models for microarray data. In:
+	  'Bioinformatics and Computational Biology Solutions using R and
+	  Bioconductor'. R. Gentleman, V. Carey, S. Dudoit, R. Irizarry,
+	  W. Huber (eds), Springer, New York, pages 397-420.
+.. class:: infomark
+edgeR
+Please cite the first paper for the software itself and the other papers for
+the various original statistical methods implemented in edgeR.  See
+Section 1.2 in the User's Guide for more detail.
+	* Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
+	  package for differential expression analysis of digital gene expression
+	  data. Bioinformatics 26, 139-140
+	* Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing
+	  differences in tag abundance. Bioinformatics 23, 2881-2887
+	* Robinson MD and Smyth GK (2008). Small-sample estimation of negative
+	  binomial dispersion, with applications to SAGE data.
+	  Biostatistics, 9, 321-332
+	* McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis
+	  of multifactor RNA-Seq experiments with respect to biological variation.
+	  Nucleic Acids Research 40, 4288-4297
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+.. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
+</help>
+</tool>

Mercurial > repos > shians > shrnaseq

comparison hairpinTool.xml @ 2:076ca575208f