annotate rgPicardHsMetrics.xml @ 1:e0232cbac965

Uploaded
author devteam
date Thu, 25 Oct 2012 12:51:42 -0400
parents 1cd7f3b42609
children 9227b8c3093b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
1 <tool name="SAM/BAM Hybrid Selection Metrics" id="PicardHsMetrics" version="1.56.0">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
2 <description>for targeted resequencing data</description>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
3 <command interpreter="python">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
4
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
5 picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file" --datatype "$input_file.ext"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
6 --baitbed "$bait_bed" --targetbed "$target_bed" -n "$out_prefix" --tmpdir "${__new_file_path__}"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
7 -j "\$JAVA_JAR_PATH/CalculateHsMetrics.jar"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
8
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
9 </command>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
10 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
11 <inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
12 <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
13 <param name="out_prefix" value="Picard HS Metrics" type="text" label="Title for the output file" help="Use to remind you what the job was for." size="80" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
14 <param name="bait_bed" type="data" format="bed,interval" label="Bait intervals: Sequences for bait in the design" help="Note specific format requirements below!" size="80" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
15 <param name="target_bed" type="data" format="bed,interval" label="Target intervals: Sequences for targets in the design" help="Note specific format requirements below!" size="80" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
16 <!--
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
17
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
18 Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
19 If commented out the heapsize defaults to the value specified within picard_wrapper.py
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
20
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
21 <param name="maxheap" type="select"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
22 help="If in doubt, try the default. If it fails with a complaint about java heap size, try increasing it please - larger jobs will require your own hardware."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
23 label="Java heap size">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
24 <option value="4G" selected = "true">4GB default </option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
25 <option value="8G" >8GB use if 4GB fails</option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
26 <option value="16G">16GB - try this if 8GB fails</option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
27 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
28
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
29 -->
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
30 </inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
31 <outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
32 <data format="html" name="html_file" label="${out_prefix}.html" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
33 </outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
34 <tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
35 <test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
36 <!-- Uncomment this if maxheap parameter is enabled
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
37 <param name="maxheap" value="8G" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
38 -->
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
39 <param name="out_prefix" value="HSMetrics" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
40 <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
41 <param name="bait_bed" value="picard_input_bait.bed" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
42 <param name="target_bed" value="picard_input_bait.bed" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
43 <output name="html_file" file="picard_output_hs_transposed_summary_alignment_stats.html" ftype="html" lines_diff="212"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
44 </test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
45 </tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
46 <help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
47
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
48 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
49
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
50 **Summary**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
51
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
52 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
53
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
54 .. class:: warnmark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
55
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
56 **WARNING about bait and target files**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
57
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
58 Picard is very fussy about the bait and target file format. If these are not exactly right, it will fail with an error something like:
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
59
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
60 Exception in thread "main" net.sf.picard.PicardException: Invalid interval record contains 6 fields: chr1 45787123 45787316 CASO_22G_25063 1000 +
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
61
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
62 If you see an error like that from this tool, please do NOT report it to any of the Galaxy mailing lists as it is not a bug!
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
63 It means you must reformat your bait and target files. Galaxy cannot do that for you automatically unfortunately.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
64
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
65 The required definition is described in the documentation at http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_command-line_arguments
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
66 and the sample provided looks like this:
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
67
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
68 chr1 1104841 1104940 + target_1
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
69 chr1 1105283 1105599 + target_2
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
70 chr1 1105712 1105860 + target_3
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
71 chr1 1105960 1106119 + target_4
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
72
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
73 So your bait and target files MUST have 5 columns with chr, start, end, strand and name tab delimited and in exactly that order.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
74 Note that the Picard mandated sam header described in the documentation linked above is automagically added by the tool in Galaxy.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
75
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
76 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
77
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
78 **Picard documentation**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
79
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
80 This is a Galaxy wrapper for CalculateHsMetrics.jar, a part of the external package Picard-tools_.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
81
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
82 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
83
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
84 -----
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
85
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
86 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
87
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
88 **Inputs, outputs, and parameters**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
89
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
90 Picard documentation says (reformatted for Galaxy):
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
91
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
92 Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
93
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
94 .. csv-table::
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
95 :header-rows: 1
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
96
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
97 "Option", "Description"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
98 "BAIT_INTERVALS=File","An interval list file that contains the locations of the baits used. Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
99 "TARGET_INTERVALS=File","An interval list file that contains the locations of the targets. Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
100 "INPUT=File","An aligned SAM or BAM file. Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
101 "OUTPUT=File","The output file to write the metrics to. Required. Cannot be used in conjuction with option(s) METRICS_FILE (M)"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
102 "METRICS_FILE=File","Legacy synonym for OUTPUT, should not be used. Required. Cannot be used in conjuction with option(s) OUTPUT (O)"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
103 "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
104
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
105 HsMetrics
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
106
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
107 The set of metrics captured that are specific to a hybrid selection analysis.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
108
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
109 Output Column Definitions::
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
110
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
111 1. BAIT_SET: The name of the bait set used in the hybrid selection.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
112 2. GENOME_SIZE: The number of bases in the reference genome used for alignment.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
113 3. BAIT_TERRITORY: The number of bases which have one or more baits on top of them.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
114 4. TARGET_TERRITORY: The unique number of target bases in the experiment where target is usually exons etc.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
115 5. BAIT_DESIGN_EFFICIENCY: Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
116 6. TOTAL_READS: The total number of reads in the SAM or BAM file examine.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
117 7. PF_READS: The number of reads that pass the vendor's filter.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
118 8. PF_UNIQUE_READS: The number of PF reads that are not marked as duplicates.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
119 9. PCT_PF_READS: PF reads / total reads. The percent of reads passing filter.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
120 10. PCT_PF_UQ_READS: PF Unique Reads / Total Reads.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
121 11. PF_UQ_READS_ALIGNED: The number of PF unique reads that are aligned with mapping score > 0 to the reference genome.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
122 12. PCT_PF_UQ_READS_ALIGNED: PF Reads Aligned / PF Reads.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
123 13. PF_UQ_BASES_ALIGNED: The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
124 14. ON_BAIT_BASES: The number of PF aligned bases that mapped to a baited region of the genome.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
125 15. NEAR_BAIT_BASES: The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
126 16. OFF_BAIT_BASES: The number of PF aligned bases that mapped to neither on or near a bait.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
127 17. ON_TARGET_BASES: The number of PF aligned bases that mapped to a targetted region of the genome.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
128 18. PCT_SELECTED_BASES: On+Near Bait Bases / PF Bases Aligned.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
129 19. PCT_OFF_BAIT: The percentage of aligned PF bases that mapped neither on or near a bait.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
130 20. ON_BAIT_VS_SELECTED: The percentage of on+near bait bases that are on as opposed to near.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
131 21. MEAN_BAIT_COVERAGE: The mean coverage of all baits in the experiment.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
132 22. MEAN_TARGET_COVERAGE: The mean coverage of targets that recieved at least coverage depth = 2 at one base.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
133 23. PCT_USABLE_BASES_ON_BAIT: The number of aligned, de-duped, on-bait bases out of the PF bases available.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
134 24. PCT_USABLE_BASES_ON_TARGET: The number of aligned, de-duped, on-target bases out of the PF bases available.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
135 25. FOLD_ENRICHMENT: The fold by which the baited region has been amplified above genomic background.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
136 26. ZERO_CVG_TARGETS_PCT: The number of targets that did not reach coverage=2 over any base.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
137 27. FOLD_80_BASE_PENALTY: The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
138 28. PCT_TARGET_BASES_2X: The percentage of ALL target bases acheiving 2X or greater coverage.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
139 29. PCT_TARGET_BASES_10X: The percentage of ALL target bases acheiving 10X or greater coverage.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
140 30. PCT_TARGET_BASES_20X: The percentage of ALL target bases acheiving 20X or greater coverage.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
141 31. PCT_TARGET_BASES_30X: The percentage of ALL target bases acheiving 30X or greater coverage.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
142 32. HS_LIBRARY_SIZE: The estimated number of unique molecules in the selected part of the library.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
143 33. HS_PENALTY_10X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 10X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 10 * HS_PENALTY_10X.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
144 34. HS_PENALTY_20X: The "hybrid selection penalty" incurred to get 80% of target bases to 20X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 20X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 20 * HS_PENALTY_20X.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
145 35. HS_PENALTY_30X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 30X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 30 * HS_PENALTY_30X.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
146
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
147 .. class:: warningmark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
148
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
149 **Warning on SAM/BAM quality**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
150
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
151 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
152 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears to be the only way to deal with SAM/BAM that cannot be parsed.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
153
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
154
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
155 </help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
156 </tool>