comparison RPKM_saturation.xml @ 32:580ee0c4bc4e

Fixes from Bjorn Gruning: create symlinks under $TMP and clean them up afterwards, replace R dependency with the Tool Shed R3 package, add --install-scripts, prepend tool-ids with rseqc
author lparsons
date Mon, 07 Oct 2013 15:01:13 -0400
parents cc5eaa9376d8
children
comparison
equal deleted inserted replaced
31:cc5eaa9376d8 32:580ee0c4bc4e
1 <tool id="RPKM_saturation" name="RPKM Saturation" version="1.1"> 1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="1.1">
2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> 2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.11.0">R</requirement> 4 <requirement type="package" version="3.0.1">R</requirement>
5 <requirement type="package" version="1.7.1">numpy</requirement> 5 <requirement type="package" version="1.7.1">numpy</requirement>
6 <requirement type="package" version="2.3.7">rseqc</requirement> 6 <requirement type="package" version="2.3.7">rseqc</requirement>
7 </requirements> 7 </requirements>
8 <command> RPKM_saturation.py -i $input -o output -r $refgene 8 <command> RPKM_saturation.py -i $input -o output -r $refgene
9 9
10 #if str($strand_type.strand_specific) == "pair" 10 #if str($strand_type.strand_specific) == "pair"
11 -d 11 -d
12 #if str($strand_type.pair_type) == "sd" 12 #if str($strand_type.pair_type) == "sd"
13 '1++,1--,2+-,2-+' 13 '1++,1--,2+-,2-+'
14 #else 14 #else
15 '1+-,1-+,2++,2--' 15 '1+-,1-+,2++,2--'
16 #end if 16 #end if
17 #end if 17 #end if
18 18
19 #if str($strand_type.strand_specific) == "single" 19 #if str($strand_type.strand_specific) == "single"
20 -d 20 -d
21 #if str($strand_type.single_type) == "s" 21 #if str($strand_type.single_type) == "s"
22 '++,--' 22 '++,--'
23 #else 23 #else
24 '+-,-+' 24 '+-,-+'
25 #end if 25 #end if
26 #end if 26 #end if
27 27
28 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff 28 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff
29 29
30 </command> 30 </command>
31 <inputs>
32 <param name="input" type="data" format="bam" label="input bam/sam file" />
33 <param name="refgene" type="data" format="bed" label="Reference gene model" />
34 <conditional name="strand_type">
35 <param name="strand_specific" type="select" label="Strand-specific?" value="None">
36 <option value="none">None</option>
37 <option value="pair">Pair-End RNA-seq</option>
38 <option value="single">Single-End RNA-seq</option>
39 </param>
40 <when value="pair">
41 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
42 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
43 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
44 </param>
45 </when>
46 <when value="single">
47 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
48 <option value="s">positive --> positive; negative --> negative</option>
49 <option value="d">positive --> negative; negative --> positive</option>
50 </param>
51 </when>
52 <when value="none"></when>
53 </conditional>
54 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" />
55 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" />
56 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" />
57 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" />
58 </inputs>
59 <outputs>
60 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/>
61 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/>
62 <data format="r" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/>
63 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/>
64 </outputs>
65 <stdio> 31 <stdio>
66 <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> 32 <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
67 <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> 33 <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
68 </stdio> 34 </stdio>
69 <help> 35 <inputs>
36 <param name="input" type="data" format="bam" label="input bam/sam file" />
37 <param name="refgene" type="data" format="bed" label="Reference gene model" />
38 <conditional name="strand_type">
39 <param name="strand_specific" type="select" label="Strand-specific?" value="None">
40 <option value="none">None</option>
41 <option value="pair">Pair-End RNA-seq</option>
42 <option value="single">Single-End RNA-seq</option>
43 </param>
44 <when value="pair">
45 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
46 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
47 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
48 </param>
49 </when>
50 <when value="single">
51 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
52 <option value="s">positive --> positive; negative --> negative</option>
53 <option value="d">positive --> negative; negative --> positive</option>
54 </param>
55 </when>
56 <when value="none"></when>
57 </conditional>
58 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" />
59 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" />
60 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" />
61 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" />
62 </inputs>
63 <outputs>
64 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/>
65 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/>
66 <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/>
67 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/>
68 </outputs>
69 <help>
70 RPKM_saturation.py 70 RPKM_saturation.py
71 ++++++++++++++++++ 71 ++++++++++++++++++
72 72
73 The precision of any sample statitics (RPKM) is affected by sample size (sequencing depth); 73 The precision of any sample statitics (RPKM) is affected by sample size (sequencing depth);
74 \'resampling\' or \'jackknifing\' is a method to estimate the precision of sample statistics by 74 \'resampling\' or \'jackknifing\' is a method to estimate the precision of sample statistics by
91 91
92 Inputs 92 Inputs
93 ++++++++++++++ 93 ++++++++++++++
94 94
95 Input BAM/SAM file 95 Input BAM/SAM file
96 Alignment file in BAM/SAM format. 96 Alignment file in BAM/SAM format.
97 97
98 Reference gene model 98 Reference gene model
99 Gene model in BED format. 99 Gene model in BED format.
100 100
101 Strand sequencing type (default=none) 101 Strand sequencing type (default=none)
102 See Infer Experiment tool if uncertain. 102 See Infer Experiment tool if uncertain.
103 103
104 Options 104 Options
105 ++++++++++++++ 105 ++++++++++++++
106 106
107 Skip Multiple Hit Reads 107 Skip Multiple Hit Reads
108 Use Multiple hit reads or use only uniquely mapped reads. 108 Use Multiple hit reads or use only uniquely mapped reads.
109 109
110 Only use exonic reads 110 Only use exonic reads
111 Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. 111 Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.
112 112
113 Output 113 Output
114 ++++++++++++++ 114 ++++++++++++++
115 115
116 1. output..eRPKM.xls: RPKM values for each transcript 116 1. output..eRPKM.xls: RPKM values for each transcript
122 :height: 600 px 122 :height: 600 px
123 :width: 600 px 123 :width: 600 px
124 :scale: 80 % 124 :scale: 80 %
125 125
126 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: 126 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
127 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. 127 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile.
128 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. 128 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile.
129 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. 129 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile.
130 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile. 130 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile.
131 - BAM/SAM file containing more than 100 million alignments will make module very slow. 131 - BAM/SAM file containing more than 100 million alignments will make module very slow.
132 - Follow example below to visualize a particular transcript (using R console):: 132 - Follow example below to visualize a particular transcript (using R console)::
133 133
134 pdf("xxx.pdf") #starts the graphics device driver for producing PDF graphics 134 pdf("xxx.pdf") #starts the graphics device driver for producing PDF graphics
135 x &lt;- seq(5,100,5) #resampling percentage (5,10,15,...,100) 135 x &lt;- seq(5,100,5) #resampling percentage (5,10,15,...,100)
154 .. image:: http://rseqc.sourceforge.net/_static/logo.png 154 .. image:: http://rseqc.sourceforge.net/_static/logo.png
155 155
156 .. _RSeQC: http://rseqc.sourceforge.net/ 156 .. _RSeQC: http://rseqc.sourceforge.net/
157 157
158 158
159 </help> 159 </help>
160 </tool> 160 </tool>