annotate rgPicardMarkDups.xml @ 4:f4d018471628 draft default tip

Uploaded
author jpruab
date Tue, 13 Aug 2013 12:09:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
f4d018471628 Uploaded
jpruab
parents:
diff changeset
1 <tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="1.56.0">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
2 <command interpreter="python">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
3 picard_wrapper.py -i "${input_file}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" -o "${out_file}"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
4 --remdups "${remDups}" --assumesorted "${assumeSorted}" --readregex "${readRegex}" --optdupdist "${optDupeDist}"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
5 -j "\$JAVA_JAR_PATH/MarkDuplicates.jar" -d "${html_file.files_path}" -t "${html_file}" -e "${input_file.ext}"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
6 </command>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
7 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
8 <inputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
9 <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
10 help="If empty, upload or import a SAM/BAM dataset."/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
11 <param name="out_prefix" value="Dupes Marked" type="text"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
12 label="Title for the output file" help="Use this remind you what the job was for" size="80" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
13 <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
14 truevalue="true" falsevalue="false" checked="yes"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
15 help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
16 <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
17 truevalue="true" falsevalue="false" checked="yes"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
18 help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
19 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
20 label="Regular expression that can be used to parse read names in the incoming SAM file"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
21 help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
f4d018471628 Uploaded
jpruab
parents:
diff changeset
22 <sanitizer>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
23 <valid initial="string.printable">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
24 <remove value="&apos;"/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
25 </valid>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
26 <mapping initial="none">
f4d018471628 Uploaded
jpruab
parents:
diff changeset
27 <add source="&apos;" target="__sq__"/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
28 </mapping>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
29 </sanitizer>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
30 </param>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
31 <param name="optDupeDist" value="100" type="integer"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
32 label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
33 help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
f4d018471628 Uploaded
jpruab
parents:
diff changeset
34 <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
35 </param>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
36
f4d018471628 Uploaded
jpruab
parents:
diff changeset
37 </inputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
38 <outputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
39 <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
40 <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
41 </outputs>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
42 <tests>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
43 <test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
44 <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
45 <param name="out_prefix" value="Dupes Marked" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
46 <param name="remDups" value="false" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
47 <param name="assumeSorted" value="true" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
48 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
49 <param name="optDupeDist" value="100" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
50 <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
51 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
52 </test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
53 <test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
54 <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
55 <param name="out_prefix" value="Dupes Marked" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
56 <param name="remDups" value="true" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
57 <param name="assumeSorted" value="true" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
58 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
59 <param name="optDupeDist" value="100" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
60 <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
61 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
f4d018471628 Uploaded
jpruab
parents:
diff changeset
62 </test>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
63 </tests>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
64
f4d018471628 Uploaded
jpruab
parents:
diff changeset
65 <help>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
66
f4d018471628 Uploaded
jpruab
parents:
diff changeset
67 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
68
f4d018471628 Uploaded
jpruab
parents:
diff changeset
69 **Purpose**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
70
f4d018471628 Uploaded
jpruab
parents:
diff changeset
71 Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
72
f4d018471628 Uploaded
jpruab
parents:
diff changeset
73 **Picard documentation**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
74
f4d018471628 Uploaded
jpruab
parents:
diff changeset
75 This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
76
f4d018471628 Uploaded
jpruab
parents:
diff changeset
77 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
f4d018471628 Uploaded
jpruab
parents:
diff changeset
78
f4d018471628 Uploaded
jpruab
parents:
diff changeset
79 -----
f4d018471628 Uploaded
jpruab
parents:
diff changeset
80
f4d018471628 Uploaded
jpruab
parents:
diff changeset
81 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
82
f4d018471628 Uploaded
jpruab
parents:
diff changeset
83 **Inputs, outputs, and parameters**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
84
f4d018471628 Uploaded
jpruab
parents:
diff changeset
85 Picard documentation says (reformatted for Galaxy):
f4d018471628 Uploaded
jpruab
parents:
diff changeset
86
f4d018471628 Uploaded
jpruab
parents:
diff changeset
87 .. csv-table:: Mark Duplicates docs
f4d018471628 Uploaded
jpruab
parents:
diff changeset
88 :header-rows: 1
f4d018471628 Uploaded
jpruab
parents:
diff changeset
89
f4d018471628 Uploaded
jpruab
parents:
diff changeset
90 Option,Description
f4d018471628 Uploaded
jpruab
parents:
diff changeset
91 "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
92 "OUTPUT=File","The output file to right marked records to Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
93 "METRICS_FILE=File","File to write duplication metrics to Required."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
94 "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
95 "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
96 "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
97 "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
f4d018471628 Uploaded
jpruab
parents:
diff changeset
98 "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
f4d018471628 Uploaded
jpruab
parents:
diff changeset
99 "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
f4d018471628 Uploaded
jpruab
parents:
diff changeset
100
f4d018471628 Uploaded
jpruab
parents:
diff changeset
101 .. class:: warningmark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
102
f4d018471628 Uploaded
jpruab
parents:
diff changeset
103 **Warning on SAM/BAM quality**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
104
f4d018471628 Uploaded
jpruab
parents:
diff changeset
105 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
106 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
f4d018471628 Uploaded
jpruab
parents:
diff changeset
107 to be the only way to deal with SAM/BAM that cannot be parsed.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
108 .. class:: infomark
f4d018471628 Uploaded
jpruab
parents:
diff changeset
109
f4d018471628 Uploaded
jpruab
parents:
diff changeset
110 **Note on the Regular Expression**
f4d018471628 Uploaded
jpruab
parents:
diff changeset
111
f4d018471628 Uploaded
jpruab
parents:
diff changeset
112 (from the Picard docs)
f4d018471628 Uploaded
jpruab
parents:
diff changeset
113 This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
f4d018471628 Uploaded
jpruab
parents:
diff changeset
114
f4d018471628 Uploaded
jpruab
parents:
diff changeset
115 Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
f4d018471628 Uploaded
jpruab
parents:
diff changeset
116
f4d018471628 Uploaded
jpruab
parents:
diff changeset
117 </help>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
118 </tool>
f4d018471628 Uploaded
jpruab
parents:
diff changeset
119
f4d018471628 Uploaded
jpruab
parents:
diff changeset
120
f4d018471628 Uploaded
jpruab
parents:
diff changeset
121
f4d018471628 Uploaded
jpruab
parents:
diff changeset
122
f4d018471628 Uploaded
jpruab
parents:
diff changeset
123
f4d018471628 Uploaded
jpruab
parents:
diff changeset
124
f4d018471628 Uploaded
jpruab
parents:
diff changeset
125
f4d018471628 Uploaded
jpruab
parents:
diff changeset
126
f4d018471628 Uploaded
jpruab
parents:
diff changeset
127
f4d018471628 Uploaded
jpruab
parents:
diff changeset
128
f4d018471628 Uploaded
jpruab
parents:
diff changeset
129
f4d018471628 Uploaded
jpruab
parents:
diff changeset
130