5
|
1 <tool name="CollectRnaSeqMetrics" id="picard_CollectRnaSeqMetrics" version="1.126.0">
|
|
2 <description> collect metrics about the alignment of RNA to various functional classes of loci in the genome</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.126.0">picard</requirement>
|
|
5 </requirements>
|
|
6
|
|
7 <macros>
|
|
8 <import>picard_macros.xml</import>
|
|
9 </macros>
|
|
10
|
|
11
|
|
12 <command>
|
|
13
|
|
14 ## Set up input files
|
|
15
|
|
16 ## Reference sequences
|
|
17
|
|
18 #set $reference_fasta_filename = "localref.fa"
|
|
19
|
|
20 #if str( $reference_source.reference_source_selector ) == "history":
|
|
21 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
|
|
22 #else:
|
|
23 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
24 #end if
|
|
25
|
|
26 ## refFlat data
|
|
27 ## The awk line below converts a file obtained from UCSC as specified in the tool help to refFlat format
|
|
28
|
|
29 grep -v '^#' ${refFlat} | awk '{print $11"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10}' > refFlat.tab &&
|
|
30
|
|
31 ## Start picard command
|
|
32
|
|
33 @java_options@
|
|
34 java -jar \$JAVA_JAR_PATH/picard.jar
|
|
35 CollectRnaSeqMetrics
|
|
36 REF_FLAT=refFlat.tab
|
|
37
|
|
38 #if str( $ribosomal_intervals ) != "None":
|
|
39 RIBOSOMAL_INTERVALS="${ribosomal_intervals}"
|
|
40 #end if
|
|
41
|
|
42 STRAND_SPECIFICITY="${strand_specificity}"
|
|
43 MINIMUM_LENGTH="${minimum_length}"
|
|
44 CHART_OUTPUT="${pdfFile}"
|
|
45
|
|
46 #for $sequence_to_ignore in $ignore_list:
|
|
47 IGNORE_SEQUENCE="${sequence_to_ignore.sequence}"
|
|
48 #end for
|
|
49
|
|
50 RRNA_FRAGMENT_PERCENTAGE="${rrna_fragment_percentage}"
|
|
51 METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
|
|
52 INPUT="${inputFile}"
|
|
53 OUTPUT="${outFile}"
|
|
54 REFERENCE_SEQUENCE="${reference_fasta_filename}"
|
|
55 ASSUME_SORTED="${assume_sorted}"
|
|
56
|
|
57 QUIET=true
|
|
58 VERBOSITY=ERROR
|
|
59 VALIDATION_STRINGENCY=${validation_stringency}
|
|
60
|
|
61 </command>
|
|
62
|
|
63 <inputs>
|
|
64 <param format="sam,bam" type="data" name="inputFile" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" />
|
|
65 <conditional name="reference_source">
|
|
66 <param name="reference_source_selector" type="select" label="Load reference genome from">
|
|
67 <option value="cached">Local cache</option>
|
|
68 <option value="history">History</option>
|
|
69 </param>
|
|
70 <when value="cached">
|
|
71 <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
|
|
72 <options from_data_table="all_fasta"></options>
|
|
73 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
74 </param>
|
|
75 </when>
|
|
76 <when value="history">
|
|
77 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
|
|
78 </when>
|
|
79 </conditional>
|
|
80 <param format="tabular" name="refFlat" type="data" label="Gene annotations in refFlat form" help="See "Obtaining gene annotations in refFlat format" below for help" />
|
|
81 <param name="ribosomal_intervals" format="picard_interval_list" type="data" optional="True" label="Location of rRNA sequences in genome, in interval_list format" help="RIBOSOMAL_INTERVALS; If not specified no bases will be identified as being ribosomal. The list of intervals can be geberated from BED or Interval datasets using Galaxy BedToIntervalList tool"/>
|
|
82 <param name="strand_specificity" type="select" label="What is the RNA-seq library strand specificity" help="STRAND_SPECIFICITY; For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.">
|
|
83 <option value="NONE" select="True">None</option>
|
|
84 <option value="FIRST_READ_TRANSCRIPTION_STRAND">First read transcription strand</option>
|
|
85 <option value="SECOND_READ_TRANSCRIPTION_STRAND">Second read transcription strand</option>
|
|
86 </param>
|
|
87 <param name="minimum_length" type="integer" value="500" label="When calculating coverage based values use only use transcripts of this length or greater" help="MINIMUM_LENGTH; default=500"/>
|
|
88 <repeat name="ignore_list" title="Sequences to ignore" min="0" help="You can provide multiple sequences by clicking the button below">
|
|
89 <param name="sequence" type="text" size="80" label="Ignore reads matching this sequence"/>
|
|
90 </repeat>
|
|
91 <param name="rrna_fragment_percentage" type="float" value="0.8" label="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA." help="RRNA_FRAGMENT_PERCENTAGE; default=0.8"/>
|
|
92 <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
|
|
93 <option value="ALL_READS" selected="True">All reads</option>
|
|
94 <option value="SAMPLE">Sample</option>
|
|
95 <option value="LIBRARY">Library</option>
|
|
96 <option value="READ_GROUP">Read group</option>
|
|
97 </param>
|
|
98 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
|
|
99
|
|
100 <expand macro="VS" />
|
|
101
|
|
102 </inputs>
|
|
103 <outputs>
|
|
104 <data format="pdf" name="pdfFile" label="${tool.name} on ${on_string}: Chart PDF"/>
|
|
105 <data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Summary stats"/>
|
|
106 </outputs>
|
|
107
|
|
108 <stdio>
|
|
109 <exit_code range="1:" level="fatal"/>
|
|
110 </stdio>
|
|
111 <tests>
|
|
112 <test>
|
|
113 <param name="reference_source_selector" value="history"/>
|
|
114 <param name="ref_file" value="picard_CollectRnaSeqMetrics_ref.fa" ftype="fasta"/>
|
|
115 <param name="inputFile" value="picard_CollectRnaSeqMetrics.bam" ftype="bam"/>
|
|
116 <param name="assume_sorted" value="true" />
|
|
117 <param name="refFlat" value="picard_CollectRnaSeqMetrics.refFlat" />
|
|
118 <param name="metric_accumulation_level" value="ALL_READS" />
|
|
119 <param name="minimum_length" value="500" />
|
|
120 <param name="strand_specificity" value="NONE" />
|
|
121 <param name="rrna_fragment_percentage" value="0.8" />
|
|
122 <output name="outFile" file="picard_CollectRnaSeqMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
|
|
123 </test>
|
|
124
|
|
125 </tests>
|
|
126 <help>
|
|
127
|
|
128 .. class:: infomark
|
|
129
|
|
130 **Purpose**
|
|
131
|
|
132 Collects metrics about the alignment of RNA to various functional classes of loci in the genome: coding, intronic, UTR, intergenic, ribosomal.
|
|
133
|
|
134 @dataset_collections@
|
|
135
|
|
136 -----
|
|
137
|
|
138 .. class:: warningmark
|
|
139
|
|
140 **Obtaining gene annotations in refFlat format**
|
|
141
|
|
142 This tool requires gene annotations in refFlat_ format. These data can be obtained from UCSC table browser directly through Galaxy by following these steps:
|
|
143
|
|
144 1. Click on **Get Data** in the upper part of left pane of Galaxy interface
|
|
145 2. Click on **UCSC Main** link
|
|
146 3. Set your genome and dataset of interest. It **must** be the same genome build against which you have mapped the reads contained in the BAM file you are analyzing
|
|
147 4. In the **output format** field choose **selected fields from primary and related tables**
|
|
148 5. Click **get output** button
|
|
149 6. In the first table presented at the top of the page select (using checkboxes) first 11 fields:
|
|
150 name
|
|
151 chrom
|
|
152 strand
|
|
153 txStart
|
|
154 txEnd
|
|
155 cdsStart
|
|
156 cdsEnd
|
|
157 exonCount
|
|
158 exonStarts
|
|
159 exonEnds
|
|
160 proteinId
|
|
161 7. Click **done with selection**
|
|
162 8. Click **Send query to Galaxy**
|
|
163 9. A new dataset will appear in the current Galaxy history
|
|
164 10. Use this dataset as the input for **Gene annotations in refFlat form** dropdown of this tool
|
|
165
|
|
166 .. _refFlat: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat
|
|
167
|
|
168 @description@
|
|
169
|
|
170 REF_FLAT=File Gene annotations in refFlat form. Format described here:
|
|
171 http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat Required.
|
|
172
|
|
173 RIBOSOMAL_INTERVALS=File Location of rRNA sequences in genome, in interval_list format. If not specified no bases
|
|
174 will be identified as being ribosomal. Format described here:
|
|
175 http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html and can be
|
|
176 generated from BED datasetes using Galaxy's wrapper for picard_BedToIntervalList tool
|
|
177
|
|
178 STRAND_SPECIFICITY=StrandSpecificity
|
|
179 STRAND=StrandSpecificity For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND
|
|
180 if the reads are expected to be on the transcription strand. Required. Possible values:
|
|
181 {NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND}
|
|
182
|
|
183 MINIMUM_LENGTH=Integer When calculating coverage based values (e.g. CV of coverage) only use transcripts of this
|
|
184 length or greater. Default value: 500.
|
|
185
|
|
186 IGNORE_SEQUENCE=String If a read maps to a sequence specified with this option, all the bases in the read are
|
|
187 counted as ignored bases.
|
|
188
|
|
189 RRNA_FRAGMENT_PERCENTAGE=Double
|
|
190 This percentage of the length of a fragment must overlap one of the ribosomal intervals
|
|
191 for a read or read pair by this must in order to be considered rRNA. Default value: 0.8.
|
|
192
|
|
193 METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
|
|
194 LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
|
|
195 LIBRARY, READ_GROUP} This option may be specified 0 or more times.
|
|
196
|
|
197 ASSUME_SORTED=Boolean
|
|
198 AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
|
|
199 value: true. Possible values: {true, false}
|
|
200
|
|
201 @more_info@
|
|
202
|
|
203 </help>
|
|
204 </tool>
|