Mercurial > repos > devteam > picard
annotate picard_CollectRnaSeqMetrics.xml @ 12:05087b27692a draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
author | devteam |
---|---|
date | Sun, 27 Nov 2016 15:11:50 -0500 |
parents | 5eaa8a968300 |
children | 7e6fd3d0f16e |
rev | line source |
---|---|
8
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
1 <tool name="CollectRnaSeqMetrics" id="picard_CollectRnaSeqMetrics" version="@TOOL_VERSION@.0"> |
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
2 <description> collect metrics about the alignment of RNA to various functional classes of loci in the genome</description> |
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
3 <macros> |
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
4 <import>picard_macros.xml</import> |
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
5 </macros> |
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
6 <expand macro="requirements"> |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
7 <requirement type="package" version="3.3.1">r</requirement> |
8
3a3234d7a2e8
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
8 </expand> |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
9 <command detect_errors="exit_code"><![CDATA[ |
5 | 10 |
11 ## Set up input files | |
12 | |
13 ## Reference sequences | |
14 | |
15 #set $reference_fasta_filename = "localref.fa" | |
16 | |
17 #if str( $reference_source.reference_source_selector ) == "history": | |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
18 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && |
5 | 19 #else: |
20 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
21 #end if | |
22 | |
23 ## refFlat data | |
24 ## The awk line below converts a file obtained from UCSC as specified in the tool help to refFlat format | |
25 | |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
26 grep -v '^#' ${refFlat} | awk '{print $11"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10}' > refFlat.tab && |
5 | 27 |
28 ## Start picard command | |
29 | |
30 @java_options@ | |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
31 picard |
5 | 32 CollectRnaSeqMetrics |
33 REF_FLAT=refFlat.tab | |
34 | |
35 #if str( $ribosomal_intervals ) != "None": | |
36 RIBOSOMAL_INTERVALS="${ribosomal_intervals}" | |
37 #end if | |
38 | |
39 STRAND_SPECIFICITY="${strand_specificity}" | |
40 MINIMUM_LENGTH="${minimum_length}" | |
41 CHART_OUTPUT="${pdfFile}" | |
42 | |
43 #for $sequence_to_ignore in $ignore_list: | |
44 IGNORE_SEQUENCE="${sequence_to_ignore.sequence}" | |
45 #end for | |
46 | |
47 RRNA_FRAGMENT_PERCENTAGE="${rrna_fragment_percentage}" | |
48 METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}" | |
49 INPUT="${inputFile}" | |
50 OUTPUT="${outFile}" | |
51 REFERENCE_SEQUENCE="${reference_fasta_filename}" | |
52 ASSUME_SORTED="${assume_sorted}" | |
53 | |
54 QUIET=true | |
55 VERBOSITY=ERROR | |
56 VALIDATION_STRINGENCY=${validation_stringency} | |
57 | |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
58 ]]></command> |
5 | 59 |
60 <inputs> | |
61 <param format="sam,bam" type="data" name="inputFile" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" /> | |
62 <conditional name="reference_source"> | |
63 <param name="reference_source_selector" type="select" label="Load reference genome from"> | |
64 <option value="cached">Local cache</option> | |
65 <option value="history">History</option> | |
66 </param> | |
67 <when value="cached"> | |
68 <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE"> | |
69 <options from_data_table="all_fasta"></options> | |
70 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
71 </param> | |
72 </when> | |
73 <when value="history"> | |
74 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" /> | |
75 </when> | |
76 </conditional> | |
77 <param format="tabular" name="refFlat" type="data" label="Gene annotations in refFlat form" help="See "Obtaining gene annotations in refFlat format" below for help" /> | |
78 <param name="ribosomal_intervals" format="picard_interval_list" type="data" optional="True" label="Location of rRNA sequences in genome, in interval_list format" help="RIBOSOMAL_INTERVALS; If not specified no bases will be identified as being ribosomal. The list of intervals can be geberated from BED or Interval datasets using Galaxy BedToIntervalList tool"/> | |
79 <param name="strand_specificity" type="select" label="What is the RNA-seq library strand specificity" help="STRAND_SPECIFICITY; For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand."> | |
12
05087b27692a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 7491208ca0c917a053798a48c3e54c3e30e95d92
devteam
parents:
9
diff
changeset
|
80 <option value="NONE" selected="True">None</option> |
5 | 81 <option value="FIRST_READ_TRANSCRIPTION_STRAND">First read transcription strand</option> |
82 <option value="SECOND_READ_TRANSCRIPTION_STRAND">Second read transcription strand</option> | |
83 </param> | |
84 <param name="minimum_length" type="integer" value="500" label="When calculating coverage based values use only use transcripts of this length or greater" help="MINIMUM_LENGTH; default=500"/> | |
85 <repeat name="ignore_list" title="Sequences to ignore" min="0" help="You can provide multiple sequences by clicking the button below"> | |
9
5eaa8a968300
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit 1869970193a1878acbc0f8a79b81dd02b37f1dc1
devteam
parents:
8
diff
changeset
|
86 <param name="sequence" type="text" label="Ignore reads matching this sequence"/> |
5 | 87 </repeat> |
88 <param name="rrna_fragment_percentage" type="float" value="0.8" label="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA." help="RRNA_FRAGMENT_PERCENTAGE; default=0.8"/> | |
89 <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL"> | |
90 <option value="ALL_READS" selected="True">All reads</option> | |
91 <option value="SAMPLE">Sample</option> | |
92 <option value="LIBRARY">Library</option> | |
93 <option value="READ_GROUP">Read group</option> | |
94 </param> | |
95 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/> | |
96 | |
97 <expand macro="VS" /> | |
98 | |
99 </inputs> | |
100 <outputs> | |
101 <data format="pdf" name="pdfFile" label="${tool.name} on ${on_string}: Chart PDF"/> | |
102 <data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Summary stats"/> | |
103 </outputs> | |
104 | |
105 <tests> | |
106 <test> | |
107 <param name="reference_source_selector" value="history"/> | |
108 <param name="ref_file" value="picard_CollectRnaSeqMetrics_ref.fa" ftype="fasta"/> | |
109 <param name="inputFile" value="picard_CollectRnaSeqMetrics.bam" ftype="bam"/> | |
110 <param name="assume_sorted" value="true" /> | |
111 <param name="refFlat" value="picard_CollectRnaSeqMetrics.refFlat" /> | |
112 <param name="metric_accumulation_level" value="ALL_READS" /> | |
113 <param name="minimum_length" value="500" /> | |
114 <param name="strand_specificity" value="NONE" /> | |
115 <param name="rrna_fragment_percentage" value="0.8" /> | |
116 <output name="outFile" file="picard_CollectRnaSeqMetrics_test1.tab" ftype="tabular" lines_diff="4"/> | |
117 </test> | |
118 | |
119 </tests> | |
120 <help> | |
121 | |
122 .. class:: infomark | |
123 | |
124 **Purpose** | |
125 | |
126 Collects metrics about the alignment of RNA to various functional classes of loci in the genome: coding, intronic, UTR, intergenic, ribosomal. | |
127 | |
128 @dataset_collections@ | |
129 | |
130 ----- | |
131 | |
132 .. class:: warningmark | |
133 | |
134 **Obtaining gene annotations in refFlat format** | |
135 | |
136 This tool requires gene annotations in refFlat_ format. These data can be obtained from UCSC table browser directly through Galaxy by following these steps: | |
137 | |
138 1. Click on **Get Data** in the upper part of left pane of Galaxy interface | |
139 2. Click on **UCSC Main** link | |
140 3. Set your genome and dataset of interest. It **must** be the same genome build against which you have mapped the reads contained in the BAM file you are analyzing | |
141 4. In the **output format** field choose **selected fields from primary and related tables** | |
142 5. Click **get output** button | |
143 6. In the first table presented at the top of the page select (using checkboxes) first 11 fields: | |
144 name | |
145 chrom | |
146 strand | |
147 txStart | |
148 txEnd | |
149 cdsStart | |
150 cdsEnd | |
151 exonCount | |
152 exonStarts | |
153 exonEnds | |
154 proteinId | |
155 7. Click **done with selection** | |
156 8. Click **Send query to Galaxy** | |
157 9. A new dataset will appear in the current Galaxy history | |
158 10. Use this dataset as the input for **Gene annotations in refFlat form** dropdown of this tool | |
159 | |
160 .. _refFlat: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat | |
161 | |
162 @description@ | |
163 | |
164 REF_FLAT=File Gene annotations in refFlat form. Format described here: | |
165 http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat Required. | |
166 | |
167 RIBOSOMAL_INTERVALS=File Location of rRNA sequences in genome, in interval_list format. If not specified no bases | |
168 will be identified as being ribosomal. Format described here: | |
169 http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html and can be | |
170 generated from BED datasetes using Galaxy's wrapper for picard_BedToIntervalList tool | |
171 | |
172 STRAND_SPECIFICITY=StrandSpecificity | |
173 STRAND=StrandSpecificity For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND | |
174 if the reads are expected to be on the transcription strand. Required. Possible values: | |
175 {NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND} | |
176 | |
177 MINIMUM_LENGTH=Integer When calculating coverage based values (e.g. CV of coverage) only use transcripts of this | |
178 length or greater. Default value: 500. | |
179 | |
180 IGNORE_SEQUENCE=String If a read maps to a sequence specified with this option, all the bases in the read are | |
181 counted as ignored bases. | |
182 | |
183 RRNA_FRAGMENT_PERCENTAGE=Double | |
184 This percentage of the length of a fragment must overlap one of the ribosomal intervals | |
185 for a read or read pair by this must in order to be considered rRNA. Default value: 0.8. | |
186 | |
187 METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel | |
188 LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE, | |
189 LIBRARY, READ_GROUP} This option may be specified 0 or more times. | |
190 | |
191 ASSUME_SORTED=Boolean | |
192 AS=Boolean If true (default), then the sort order in the header file will be ignored. Default | |
193 value: true. Possible values: {true, false} | |
194 | |
195 @more_info@ | |
196 | |
197 </help> | |
198 </tool> |