annotate rgPicardHsMetrics.xml @ 4:f4d018471628 draft default tip

Uploaded
author jpruab
date Tue, 13 Aug 2013 12:09:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
f4d018471628 Uploaded
jpruab
parents:
diff changeset
1 <tool name="SAM/BAM Hybrid Selection Metrics" id="PicardHsMetrics" version="1.56.0">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
2 <description>for targeted resequencing data</description>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
3 <command interpreter="python">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
4
f4d018471628 Uploaded
jpruab
parents:
diff changeset
5 picard_wrapper.py -i "${input_file}" -d "${html_file.files_path}" -t "${html_file}" --datatype "${input_file.ext}"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
6 --baitbed "${bait_bed}" --targetbed "${target_bed}" -n "${out_prefix}" --tmpdir "${__new_file_path__}"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
7 -j "\$JAVA_JAR_PATH/CalculateHsMetrics.jar"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
8
f4d018471628 Uploaded
jpruab
parents:
diff changeset
9 </command>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
10 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
11 <inputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
12 <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
13 <param name="out_prefix" value="Picard HS Metrics" type="text" label="Title for the output file" help="Use to remind you what the job was for." size="80" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
14 <param name="bait_bed" type="data" format="bed,interval" label="Bait intervals: Sequences for bait in the design" help="Note specific format requirements below!" size="80" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
15 <param name="target_bed" type="data" format="bed,interval" label="Target intervals: Sequences for targets in the design" help="Note specific format requirements below!" size="80" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
16 <!--
f4d018471628 Uploaded
jpruab
parents:
diff changeset
17
f4d018471628 Uploaded
jpruab
parents:
diff changeset
18 Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
19 If commented out the heapsize defaults to the value specified within picard_wrapper.py
f4d018471628 Uploaded
jpruab
parents:
diff changeset
20
f4d018471628 Uploaded
jpruab
parents:
diff changeset
21 <param name="maxheap" type="select"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
22 help="If in doubt, try the default. If it fails with a complaint about java heap size, try increasing it please - larger jobs will require your own hardware."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
23 label="Java heap size">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
24 <option value="4G" selected = "true">4GB default </option>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
25 <option value="8G" >8GB use if 4GB fails</option>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
26 <option value="16G">16GB - try this if 8GB fails</option>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
27 </param>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
28
f4d018471628 Uploaded
jpruab
parents:
diff changeset
29 -->
f4d018471628 Uploaded
jpruab
parents:
diff changeset
30 </inputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
31 <outputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
32 <data format="html" name="html_file" label="${out_prefix}.html" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
33 </outputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
34 <tests>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
35 <test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
36 <!-- Uncomment this if maxheap parameter is enabled
f4d018471628 Uploaded
jpruab
parents:
diff changeset
37 <param name="maxheap" value="8G" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
38 -->
f4d018471628 Uploaded
jpruab
parents:
diff changeset
39 <param name="out_prefix" value="HSMetrics" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
40 <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
41 <param name="bait_bed" value="picard_input_bait.bed" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
42 <param name="target_bed" value="picard_input_bait.bed" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
43 <output name="html_file" file="picard_output_hs_transposed_summary_alignment_stats.html" ftype="html" lines_diff="212"/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
44 </test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
45 </tests>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
46 <help>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
47
f4d018471628 Uploaded
jpruab
parents:
diff changeset
48 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
49
f4d018471628 Uploaded
jpruab
parents:
diff changeset
50 **Summary**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
51
f4d018471628 Uploaded
jpruab
parents:
diff changeset
52 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
53
f4d018471628 Uploaded
jpruab
parents:
diff changeset
54 .. class:: warnmark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
55
f4d018471628 Uploaded
jpruab
parents:
diff changeset
56 **WARNING about bait and target files**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
57
f4d018471628 Uploaded
jpruab
parents:
diff changeset
58 Picard is very fussy about the bait and target file format. If these are not exactly right, it will fail with an error something like:
f4d018471628 Uploaded
jpruab
parents:
diff changeset
59
f4d018471628 Uploaded
jpruab
parents:
diff changeset
60 Exception in thread "main" net.sf.picard.PicardException: Invalid interval record contains 6 fields: chr1 45787123 45787316 CASO_22G_25063 1000 +
f4d018471628 Uploaded
jpruab
parents:
diff changeset
61
f4d018471628 Uploaded
jpruab
parents:
diff changeset
62 If you see an error like that from this tool, please do NOT report it to any of the Galaxy mailing lists as it is not a bug!
f4d018471628 Uploaded
jpruab
parents:
diff changeset
63 It means you must reformat your bait and target files. Galaxy cannot do that for you automatically unfortunately.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
64
f4d018471628 Uploaded
jpruab
parents:
diff changeset
65 The required definition is described in the documentation at http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_command-line_arguments
f4d018471628 Uploaded
jpruab
parents:
diff changeset
66 and the sample provided looks like this:
f4d018471628 Uploaded
jpruab
parents:
diff changeset
67
f4d018471628 Uploaded
jpruab
parents:
diff changeset
68 chr1 1104841 1104940 + target_1
f4d018471628 Uploaded
jpruab
parents:
diff changeset
69 chr1 1105283 1105599 + target_2
f4d018471628 Uploaded
jpruab
parents:
diff changeset
70 chr1 1105712 1105860 + target_3
f4d018471628 Uploaded
jpruab
parents:
diff changeset
71 chr1 1105960 1106119 + target_4
f4d018471628 Uploaded
jpruab
parents:
diff changeset
72
f4d018471628 Uploaded
jpruab
parents:
diff changeset
73 So your bait and target files MUST have 5 columns with chr, start, end, strand and name tab delimited and in exactly that order.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
74 Note that the Picard mandated sam header described in the documentation linked above is automagically added by the tool in Galaxy.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
75
f4d018471628 Uploaded
jpruab
parents:
diff changeset
76 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
77
f4d018471628 Uploaded
jpruab
parents:
diff changeset
78 **Picard documentation**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
79
f4d018471628 Uploaded
jpruab
parents:
diff changeset
80 This is a Galaxy wrapper for CalculateHsMetrics.jar, a part of the external package Picard-tools_.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
81
f4d018471628 Uploaded
jpruab
parents:
diff changeset
82 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
f4d018471628 Uploaded
jpruab
parents:
diff changeset
83
f4d018471628 Uploaded
jpruab
parents:
diff changeset
84 -----
f4d018471628 Uploaded
jpruab
parents:
diff changeset
85
f4d018471628 Uploaded
jpruab
parents:
diff changeset
86 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
87
f4d018471628 Uploaded
jpruab
parents:
diff changeset
88 **Inputs, outputs, and parameters**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
89
f4d018471628 Uploaded
jpruab
parents:
diff changeset
90 Picard documentation says (reformatted for Galaxy):
f4d018471628 Uploaded
jpruab
parents:
diff changeset
91
f4d018471628 Uploaded
jpruab
parents:
diff changeset
92 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
93
f4d018471628 Uploaded
jpruab
parents:
diff changeset
94 .. csv-table::
f4d018471628 Uploaded
jpruab
parents:
diff changeset
95 :header-rows: 1
f4d018471628 Uploaded
jpruab
parents:
diff changeset
96
f4d018471628 Uploaded
jpruab
parents:
diff changeset
97 "Option", "Description"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
98 "BAIT_INTERVALS=File","An interval list file that contains the locations of the baits used. Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
99 "TARGET_INTERVALS=File","An interval list file that contains the locations of the targets. Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
100 "INPUT=File","An aligned SAM or BAM file. Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
101 "OUTPUT=File","The output file to write the metrics to. Required. Cannot be used in conjuction with option(s) METRICS_FILE (M)"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
102 "METRICS_FILE=File","Legacy synonym for OUTPUT, should not be used. Required. Cannot be used in conjuction with option(s) OUTPUT (O)"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
103 "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
104
f4d018471628 Uploaded
jpruab
parents:
diff changeset
105 HsMetrics
f4d018471628 Uploaded
jpruab
parents:
diff changeset
106
f4d018471628 Uploaded
jpruab
parents:
diff changeset
107 The set of metrics captured that are specific to a hybrid selection analysis.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
108
f4d018471628 Uploaded
jpruab
parents:
diff changeset
109 Output Column Definitions::
f4d018471628 Uploaded
jpruab
parents:
diff changeset
110
f4d018471628 Uploaded
jpruab
parents:
diff changeset
111 1. BAIT_SET: The name of the bait set used in the hybrid selection.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
112 2. GENOME_SIZE: The number of bases in the reference genome used for alignment.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
113 3. BAIT_TERRITORY: The number of bases which have one or more baits on top of them.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
114 4. TARGET_TERRITORY: The unique number of target bases in the experiment where target is usually exons etc.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
115 5. BAIT_DESIGN_EFFICIENCY: Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
116 6. TOTAL_READS: The total number of reads in the SAM or BAM file examine.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
117 7. PF_READS: The number of reads that pass the vendor's filter.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
118 8. PF_UNIQUE_READS: The number of PF reads that are not marked as duplicates.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
119 9. PCT_PF_READS: PF reads / total reads. The percent of reads passing filter.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
120 10. PCT_PF_UQ_READS: PF Unique Reads / Total Reads.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
121 11. PF_UQ_READS_ALIGNED: The number of PF unique reads that are aligned with mapping score > 0 to the reference genome.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
122 12. PCT_PF_UQ_READS_ALIGNED: PF Reads Aligned / PF Reads.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
123 13. PF_UQ_BASES_ALIGNED: The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
124 14. ON_BAIT_BASES: The number of PF aligned bases that mapped to a baited region of the genome.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
125 15. NEAR_BAIT_BASES: The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
126 16. OFF_BAIT_BASES: The number of PF aligned bases that mapped to neither on or near a bait.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
127 17. ON_TARGET_BASES: The number of PF aligned bases that mapped to a targetted region of the genome.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
128 18. PCT_SELECTED_BASES: On+Near Bait Bases / PF Bases Aligned.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
129 19. PCT_OFF_BAIT: The percentage of aligned PF bases that mapped neither on or near a bait.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
130 20. ON_BAIT_VS_SELECTED: The percentage of on+near bait bases that are on as opposed to near.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
131 21. MEAN_BAIT_COVERAGE: The mean coverage of all baits in the experiment.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
132 22. MEAN_TARGET_COVERAGE: The mean coverage of targets that recieved at least coverage depth = 2 at one base.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
133 23. PCT_USABLE_BASES_ON_BAIT: The number of aligned, de-duped, on-bait bases out of the PF bases available.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
134 24. PCT_USABLE_BASES_ON_TARGET: The number of aligned, de-duped, on-target bases out of the PF bases available.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
135 25. FOLD_ENRICHMENT: The fold by which the baited region has been amplified above genomic background.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
136 26. ZERO_CVG_TARGETS_PCT: The number of targets that did not reach coverage=2 over any base.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
137 27. FOLD_80_BASE_PENALTY: The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
138 28. PCT_TARGET_BASES_2X: The percentage of ALL target bases acheiving 2X or greater coverage.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
139 29. PCT_TARGET_BASES_10X: The percentage of ALL target bases acheiving 10X or greater coverage.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
140 30. PCT_TARGET_BASES_20X: The percentage of ALL target bases acheiving 20X or greater coverage.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
141 31. PCT_TARGET_BASES_30X: The percentage of ALL target bases acheiving 30X or greater coverage.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
142 32. HS_LIBRARY_SIZE: The estimated number of unique molecules in the selected part of the library.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
143 33. HS_PENALTY_10X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 10X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 10 * HS_PENALTY_10X.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
144 34. HS_PENALTY_20X: The "hybrid selection penalty" incurred to get 80% of target bases to 20X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 20X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 20 * HS_PENALTY_20X.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
145 35. HS_PENALTY_30X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 30X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 30 * HS_PENALTY_30X.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
146
f4d018471628 Uploaded
jpruab
parents:
diff changeset
147 .. class:: warningmark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
148
f4d018471628 Uploaded
jpruab
parents:
diff changeset
149 **Warning on SAM/BAM quality**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
150
f4d018471628 Uploaded
jpruab
parents:
diff changeset
151 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
152 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears to be the only way to deal with SAM/BAM that cannot be parsed.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
153
f4d018471628 Uploaded
jpruab
parents:
diff changeset
154
f4d018471628 Uploaded
jpruab
parents:
diff changeset
155 </help>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
156 </tool>