annotate rgPicardASMetrics.xml @ 0:1cd7f3b42609

Uploaded tool.
author devteam
date Tue, 23 Oct 2012 13:14:29 -0400
parents
children 9227b8c3093b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
1 <tool name="SAM/BAM Alignment Summary Metrics" id="PicardASMetrics" version="1.56.0">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
2 <command interpreter="python">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
3 picard_wrapper.py -i "$input_file" -d "$html_file.files_path" -t "$html_file"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
4 --assumesorted "$sorted" -b "$bisulphite" --adaptors "$adaptors" --maxinsert "$maxinsert" -n "$out_prefix" --datatype "$input_file.ext"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
5 -j \$JAVA_JAR_PATH/CollectAlignmentSummaryMetrics.jar --tmpdir "${__new_file_path__}"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
6 #if $genomeSource.refGenomeSource == "history":
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
7 --ref-file "$genomeSource.ownFile"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
8 #else
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
9 --ref "${genomeSource.index.fields.path}"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
10 #end if
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
11 </command>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
12 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
13 <inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
14 <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
15 help="If empty, upload or import a SAM/BAM dataset."/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
16 <param name="out_prefix" value="Picard Alignment Summary Metrics" type="text"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
17 label="Title for the output file" help="Use this remind you what the job was for." size="80" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
18
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
19 <conditional name="genomeSource">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
20
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
21 <param name="refGenomeSource" type="select" label="Select Reference Genome">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
22 <option value="default" selected="true">Use the assigned data genome/build</option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
23 <option value="indexed">Select a different built-in genome</option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
24 <option value="history">Use a genome (fasta format) from my history</option>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
25 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
26 <when value="default">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
27 <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference.">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
28 <options from_data_table="all_fasta">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
29 <filter type="data_meta" ref="input_file" key="dbkey" column="dbkey" multiple="True" separator="," />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
30 <validator type="no_options" message="No reference build available for selected input" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
31 </options>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
32 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
33 </when>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
34 <when value="indexed">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
35 <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown.">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
36 <options from_data_table="all_fasta">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
37 </options>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
38 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
39 </when>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
40 <when value="history">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
41 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
42 </when>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
43 </conditional>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
44 <param name="sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
45 <param name="bisulphite" type="boolean" label="Input file contains Bisulphite sequenced reads" checked="false" falsevalue="false" truevalue="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
46 <param name="adaptors" value="" type="text" area="true" label="Adapter sequences" help="One per line if multiple" size="5x120" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
47 <param name="maxinsert" value="100000" type="integer" label="Larger paired end reads and inter-chromosomal pairs considered chimeric " size="20" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
48 </inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
49 <outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
50 <data format="html" name="html_file" label="${out_prefix}.html" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
51 </outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
52 <tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
53 <test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
54 <param name="out_prefix" value="AsMetrics" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
55 <param name="bisulphite" value="false" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
56 <param name="sorted" value="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
57 <param name="adaptors" value="" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
58 <param name="maxinsert" value="100000" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
59 <param name="refGenomeSource" value="history" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
60 <param name="ownFile" value="picard_input_hg18.trimmed.fasta" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
61 <param name="input_file" value="picard_input_tiny.sam" dbkey="hg18" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
62 <output name="html_file" file="picard_output_alignment_summary_metrics.html" ftype="html" lines_diff="55"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
63 </test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
64 <test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
65 <param name="out_prefix" value="AsMetricsIndexed" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
66 <param name="bisulphite" value="false" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
67 <param name="sorted" value="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
68 <param name="adaptors" value="" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
69 <param name="maxinsert" value="100000" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
70 <param name="refGenomeSource" value="indexed" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
71 <param name="index" value="hg19" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
72 <param name="input_file" value="picard_input_sorted_pair.sam" dbkey="hg19" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
73 <output name="html_file" file="picard_output_AsMetrics_indexed_hg18_sorted_pair.html" ftype="html" lines_diff="50"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
74 </test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
75 </tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
76 <help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
77
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
78 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
79
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
80 **Summary**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
81
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
82 This Galaxy tool uses Picard to report high-level measures of alignment based on a provided sam or bam file.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
83
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
84 **Picard documentation**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
85
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
86 This is a Galaxy wrapper for CollectAlignmentSummaryMetrics, a part of the external package Picard-tools_.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
87
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
88 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
89
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
90 -----
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
91
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
92 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
93
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
94 **Syntax**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
95
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
96 - **Input** - SAM/BAM format aligned short read data in your current history
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
97 - **Title** - the title to use for all output files from this job - use it for high level metadata
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
98 - **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices:
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
99
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
100 - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
101 - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
102 - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
103
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
104 - **Assume Sorted** - saves sorting time - but only if true!
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
105 - **Bisulphite data** - see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
106 - **Maximum acceptable insertion length** - see Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
107
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
108 -----
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
109
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
110 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
111
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
112 **Inputs, outputs, and parameters**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
113
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
114 The Picard documentation (reformatted for Galaxy) says:
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
115
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
116 .. csv-table::
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
117 :header-rows: 1
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
118
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
119 Option,Description
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
120 "INPUT=File","SAM or BAM file Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
121 "OUTPUT=File","File to write insert size metrics to Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
122 "REFERENCE_SEQUENCE=File","Reference sequence file Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
123 "ASSUME_SORTED=Boolean","If true (default), unsorted SAM/BAM files will be considerd coordinate sorted "
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
124 "MAX_INSERT_SIZE=Integer","Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
125 "ADAPTER_SEQUENCE=String","This option may be specified 0 or more times. "
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
126 "IS_BISULFITE_SEQUENCED=Boolean","Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. "
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
127 "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
128
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
129 The output produced by the tool has the following columns::
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
130
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
131 1. CATEGORY: One of either UNPAIRED (for a fragment run), FIRST_OF_PAIR when metrics are for only the first read in a paired run, SECOND_OF_PAIR when the metrics are for only the second read in a paired run or PAIR when the metrics are aggregeted for both first and second reads in a pair.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
132 2. TOTAL_READS: The total number of reads including all PF and non-PF reads. When CATEGORY equals PAIR this value will be 2x the number of clusters.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
133 3. PF_READS: The number of PF reads where PF is defined as passing Illumina's filter.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
134 4. PCT_PF_READS: The percentage of reads that are PF (PF_READS / TOTAL_READS)
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
135 5. PF_NOISE_READS: The number of PF reads that are marked as noise reads. A noise read is one which is composed entirey of A bases and/or N bases. These reads are marked as they are usually artifactual and are of no use in downstream analysis.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
136 6. PF_READS_ALIGNED: The number of PF reads that were aligned to the reference sequence. This includes reads that aligned with low quality (i.e. their alignments are ambiguous).
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
137 7. PCT_PF_READS_ALIGNED: The percentage of PF reads that aligned to the reference sequence. PF_READS_ALIGNED / PF_READS
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
138 8. PF_HQ_ALIGNED_READS: The number of PF reads that were aligned to the reference sequence with a mapping quality of Q20 or higher signifying that the aligner estimates a 1/100 (or smaller) chance that the alignment is wrong.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
139 9. PF_HQ_ALIGNED_BASES: The number of bases aligned to the reference sequence in reads that were mapped at high quality. Will usually approximate PF_HQ_ALIGNED_READS * READ_LENGTH but may differ when either mixed read lengths are present or many reads are aligned with gaps.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
140 10. PF_HQ_ALIGNED_Q20_BASES: The subest of PF_HQ_ALIGNED_BASES where the base call quality was Q20 or higher.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
141 11. PF_HQ_MEDIAN_MISMATCHES: The median number of mismatches versus the reference sequence in reads that were aligned to the reference at high quality (i.e. PF_HQ_ALIGNED READS).
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
142 12. PF_HQ_ERROR_RATE: The percentage of bases that mismatch the reference in PF HQ aligned reads.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
143 13. MEAN_READ_LENGTH: The mean read length of the set of reads examined. When looking at the data for a single lane with equal length reads this number is just the read length. When looking at data for merged lanes with differing read lengths this is the mean read length of all reads.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
144 14. READS_ALIGNED_IN_PAIRS: The number of aligned reads who's mate pair was also aligned to the reference.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
145 15. PCT_READS_ALIGNED_IN_PAIRS: The percentage of reads who's mate pair was also aligned to the reference. READS_ALIGNED_IN_PAIRS / PF_READS_ALIGNED
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
146 16. BAD_CYCLES: The number of instrument cycles in which 80% or more of base calls were no-calls.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
147 17. STRAND_BALANCE: The number of PF reads aligned to the positive strand of the genome divided by the number of PF reads aligned to the genome.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
148 18. PCT_CHIMERAS: The percentage of reads that map outside of a maximum insert size (usually 100kb) or that have the two ends mapping to different chromosomes.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
149 19. PCT_ADAPTER: The percentage of PF reads that are unaligned and match to a known adapter sequence right from the start of the read.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
150
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
151 .. class:: warningmark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
152
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
153 **Warning on SAM/BAM quality**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
154
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
155 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
156 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
157 to be the only way to deal with SAM/BAM that cannot be parsed.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
158
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
159
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
160 </help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
161 </tool>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
162