Mercurial > repos > nilesh > rseqc
comparison RPKM_saturation.xml @ 51:09846d5169fa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rseqc commit 37fb1988971807c6a072e1afd98eeea02329ee83
author | iuc |
---|---|
date | Tue, 14 Mar 2017 10:23:21 -0400 |
parents | f242ee103277 |
children | 5873cd7afb67 |
comparison
equal
deleted
inserted
replaced
50:f242ee103277 | 51:09846d5169fa |
---|---|
1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="2.4galaxy1"> | 1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="@WRAPPER_VERSION@"> |
2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> | 2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> |
3 | 3 |
4 <macros> | 4 <macros> |
5 <import>rseqc_macros.xml</import> | 5 <import>rseqc_macros.xml</import> |
6 </macros> | 6 </macros> |
7 | 7 |
8 <requirements> | 8 <expand macro="requirements" /> |
9 <expand macro="requirement_package_r" /> | |
10 <expand macro="requirement_package_numpy" /> | |
11 <expand macro="requirement_package_rseqc" /> | |
12 </requirements> | |
13 | 9 |
14 <expand macro="stdio" /> | 10 <expand macro="stdio" /> |
15 | 11 |
16 <version_command><![CDATA[RPKM_saturation.py --version]]></version_command> | 12 <version_command><![CDATA[RPKM_saturation.py --version]]></version_command> |
17 | 13 |
18 <command><![CDATA[ | 14 <command><![CDATA[ |
19 RPKM_saturation.py -i $input -o output -r $refgene | 15 RPKM_saturation.py -i '${input}' -o output -r '${refgene}' |
20 | 16 |
21 #if str($strand_type.strand_specific) == "pair" | 17 #if str($strand_type.strand_specific) == "pair" |
22 -d | 18 -d |
23 #if str($strand_type.pair_type) == "sd" | 19 #if str($strand_type.pair_type) == "sd" |
24 '1++,1--,2+-,2-+' | 20 '1++,1--,2+-,2-+' |
34 #else | 30 #else |
35 '+-,-+' | 31 '+-,-+' |
36 #end if | 32 #end if |
37 #end if | 33 #end if |
38 | 34 |
39 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff | 35 -l ${percentileFloor} -u ${percentileCeiling} -s ${percentileStep} -c ${rpkmCutoff} |
40 ]]> | 36 ]]> |
41 </command> | 37 </command> |
42 | 38 |
43 <inputs> | 39 <inputs> |
44 <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> | 40 <expand macro="bam_param" /> |
45 <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> | 41 <expand macro="refgene_param" /> |
46 <conditional name="strand_type"> | 42 <expand macro="strand_type_param" /> |
47 <param name="strand_specific" type="select" label="Strand-specific?" value="None"> | |
48 <option value="none">None</option> | |
49 <option value="pair">Pair-End RNA-seq</option> | |
50 <option value="single">Single-End RNA-seq</option> | |
51 </param> | |
52 <when value="pair"> | |
53 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)"> | |
54 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> | |
55 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> | |
56 </param> | |
57 </when> | |
58 <when value="single"> | |
59 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)"> | |
60 <option value="s">positive --> positive; negative --> negative</option> | |
61 <option value="d">positive --> negative; negative --> positive</option> | |
62 </param> | |
63 </when> | |
64 <when value="none"></when> | |
65 </conditional> | |
66 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/> | 43 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/> |
67 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" /> | 44 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" /> |
68 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" /> | 45 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" /> |
69 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" /> | 46 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" /> |
70 <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> | 47 <expand macro="mapq_param" /> |
48 <expand macro="rscript_output_param" /> | |
71 </inputs> | 49 </inputs> |
72 | 50 |
73 <outputs> | 51 <outputs> |
74 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> | 52 <expand macro="pdf_output_data" filename="output.saturation.pdf" /> |
75 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> | 53 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM xls)"/> |
76 <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> | 54 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count xls)"/> |
77 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> | 55 <expand macro="rscript_output_data" filename="output.saturation.r" /> |
78 </outputs> | 56 </outputs> |
79 | 57 |
80 <tests> | 58 <tests> |
81 <test> | 59 <test> |
82 <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/> | 60 <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/> |
83 <param name="refgene" value="hg19.HouseKeepingGenes_30.bed"/> | 61 <param name="refgene" value="hg19.HouseKeepingGenes_30.bed"/> |
62 <param name="rscript_output" value="true" /> | |
84 <output name="outputxls"> | 63 <output name="outputxls"> |
85 <assert_contents> | 64 <assert_contents> |
86 <has_n_columns n="26" /> | 65 <has_n_columns n="26" /> |
87 <has_line_matching expression="chr1\t16174358\t16266950\tNM_015001.*" /> | 66 <has_line_matching expression="chr1\t16174358\t16266950\tNM_015001.*" /> |
88 </assert_contents> | 67 </assert_contents> |
97 <assert_contents> | 76 <assert_contents> |
98 <has_text text="pdf('output.saturation.pdf')" /> | 77 <has_text text="pdf('output.saturation.pdf')" /> |
99 <has_line_matching expression="S5=c\(\d+\.\d+\)" /> | 78 <has_line_matching expression="S5=c\(\d+\.\d+\)" /> |
100 </assert_contents> | 79 </assert_contents> |
101 </output> | 80 </output> |
81 <output name="outputpdf" file="output.saturation.pdf" compare="sim_size" /> | |
102 </test> | 82 </test> |
103 </tests> | 83 </tests> |
104 | 84 |
105 <help><![CDATA[ | 85 <help><![CDATA[ |
106 RPKM_saturation.py | 86 RPKM_saturation.py |
118 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used | 98 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used |
119 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real | 99 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real |
120 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a | 100 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a |
121 proxy, we use the RPKM estimated from total reads to approximate RPKMreal. | 101 proxy, we use the RPKM estimated from total reads to approximate RPKMreal. |
122 | 102 |
123 .. image:: http://rseqc.sourceforge.net/_images/RelativeError.png | 103 .. image:: $PATH_TO_IMAGES/RelativeError.png |
124 :height: 80 px | 104 :height: 80 px |
125 :width: 400 px | 105 :width: 400 px |
126 :scale: 100 % | 106 :scale: 100 % |
127 | 107 |
128 Inputs | 108 Inputs |
152 1. output..eRPKM.xls: RPKM values for each transcript | 132 1. output..eRPKM.xls: RPKM values for each transcript |
153 2. output.rawCount.xls: Raw count for each transcript | 133 2. output.rawCount.xls: Raw count for each transcript |
154 3. output.saturation.r: R script to generate plot | 134 3. output.saturation.r: R script to generate plot |
155 4. output.saturation.pdf: | 135 4. output.saturation.pdf: |
156 | 136 |
157 .. image:: http://rseqc.sourceforge.net/_images/saturation.png | 137 .. image:: $PATH_TO_IMAGES/saturation.png |
158 :height: 600 px | 138 :height: 600 px |
159 :width: 600 px | 139 :width: 600 px |
160 :scale: 80 % | 140 :scale: 80 % |
161 | 141 |
162 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: | 142 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: |
171 x <- seq(5,100,5) #resampling percentage (5,10,15,...,100) | 151 x <- seq(5,100,5) #resampling percentage (5,10,15,...,100) |
172 rpkm <- c(32.95,35.43,35.15,36.04,36.41,37.76,38.96,38.62,37.81,38.14,37.97,38.58,38.59,38.54,38.67, 38.67,38.87,38.68, 38.42, 38.23) #Paste RPKM values calculated from each subsets | 152 rpkm <- c(32.95,35.43,35.15,36.04,36.41,37.76,38.96,38.62,37.81,38.14,37.97,38.58,38.59,38.54,38.67, 38.67,38.87,38.68, 38.42, 38.23) #Paste RPKM values calculated from each subsets |
173 scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage") | 153 scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage") |
174 dev.off() #close graphical device | 154 dev.off() #close graphical device |
175 | 155 |
176 .. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png | 156 .. image:: $PATH_TO_IMAGES/saturation_eg.png |
177 :height: 600 px | 157 :height: 600 px |
178 :width: 600 px | 158 :width: 600 px |
179 :scale: 80 % | 159 :scale: 80 % |
180 | 160 |
181 ----- | 161 @ABOUT@ |
182 | 162 |
183 About RSeQC | |
184 +++++++++++ | |
185 | |
186 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. | |
187 | |
188 The RSeQC package is licensed under the GNU GPL v3 license. | |
189 | |
190 .. image:: http://rseqc.sourceforge.net/_static/logo.png | |
191 | |
192 .. _RSeQC: http://rseqc.sourceforge.net/ | |
193 ]]> | 163 ]]> |
194 </help> | 164 </help> |
195 | 165 |
196 <expand macro="citations" /> | 166 <expand macro="citations" /> |
197 | 167 |