Mercurial > repos > devteam > picard
comparison picard_MarkDuplicates.xml @ 18:7615ac66c6e5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/picard commit 3ce5dea3af8f4816b4a83914b53402aa84e08fca
author | iuc |
---|---|
date | Sat, 20 Jan 2018 08:28:24 -0500 |
parents | fc288950c3b7 |
children | 5053a18d9bc8 |
comparison
equal
deleted
inserted
replaced
17:fc288950c3b7 | 18:7615ac66c6e5 |
---|---|
1 <tool name="MarkDuplicates" id="picard_MarkDuplicates" version="@TOOL_VERSION@.1"> | 1 <tool name="MarkDuplicates" id="picard_MarkDuplicates" version="@TOOL_VERSION@.2"> |
2 <description>examine aligned records in BAM datasets to locate duplicate molecules</description> | 2 <description>examine aligned records in BAM datasets to locate duplicate molecules</description> |
3 <macros> | 3 <macros> |
4 <import>picard_macros.xml</import> | 4 <import>picard_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
22 ASSUME_SORTED='${assume_sorted}' | 22 ASSUME_SORTED='${assume_sorted}' |
23 | 23 |
24 DUPLICATE_SCORING_STRATEGY='${duplicate_scoring_strategy}' | 24 DUPLICATE_SCORING_STRATEGY='${duplicate_scoring_strategy}' |
25 | 25 |
26 #import pipes | 26 #import pipes |
27 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" } | 27 #if $read_name_regex: |
28 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) } | |
29 #end if | |
28 OPTICAL_DUPLICATE_PIXEL_DISTANCE='${optical_duplicate_pixel_distance}' | 30 OPTICAL_DUPLICATE_PIXEL_DISTANCE='${optical_duplicate_pixel_distance}' |
29 | 31 |
30 # Optional arguments | 32 # Optional arguments |
31 #if $barcode_tag: | 33 #if $barcode_tag: |
32 BARCODE_TAG='${barcode_tag}' | 34 BARCODE_TAG='${barcode_tag}' |
33 #end if | 35 #end if |
34 | 36 |
35 VALIDATION_STRINGENCY='${validation_stringency}' | 37 VALIDATION_STRINGENCY='${validation_stringency}' |
36 QUIET=true | 38 QUIET=true |
37 VERBOSITY=ERROR | 39 VERBOSITY=ERROR |
48 <param name="duplicate_scoring_strategy" type="select" label="The scoring strategy for choosing the non-duplicate among candidates" help="DUPLICATE_SCORING_STRATEGY; default=SUM_OF_BASE_QUALITIES"> | 50 <param name="duplicate_scoring_strategy" type="select" label="The scoring strategy for choosing the non-duplicate among candidates" help="DUPLICATE_SCORING_STRATEGY; default=SUM_OF_BASE_QUALITIES"> |
49 <option value="SUM_OF_BASE_QUALITIES">SUM_OF_BASE_QUALITIES</option> | 51 <option value="SUM_OF_BASE_QUALITIES">SUM_OF_BASE_QUALITIES</option> |
50 <option value="TOTAL_MAPPED_REFERENCE_LENGTH">TOTAL_MAPPED_REFERENCE_LENGTH</option> | 52 <option value="TOTAL_MAPPED_REFERENCE_LENGTH">TOTAL_MAPPED_REFERENCE_LENGTH</option> |
51 </param> | 53 </param> |
52 | 54 |
53 | 55 <param name="read_name_regex" type="text" value="" label="Regular expression that can be used in unusual situations to parse non-standard read names in the incoming SAM/BAM dataset" help="READ_NAME_REGEX; Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. See help below for more info; default='' (uses : separation)"> |
54 <param name="read_name_regex" type="text" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*." label="Regular expression that can be used to parse read names in the incoming SAM/BAM dataset" help="READ_NAME_REGEX; Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. See help below for more info; default=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."> | 56 |
55 <sanitizer> | 57 <sanitizer> |
56 <valid initial="string.printable"> | 58 <valid initial="string.printable"> |
57 </valid> | 59 </valid> |
58 </sanitizer> | 60 </sanitizer> |
59 </param> | 61 </param> |
74 <test> | 76 <test> |
75 <param name="inputFile" value="picard_MarkDuplicates.bam" ftype="bam"/> | 77 <param name="inputFile" value="picard_MarkDuplicates.bam" ftype="bam"/> |
76 <param name="comment" value="test-run"/> | 78 <param name="comment" value="test-run"/> |
77 <param name="assume_sorted" value="True"/> | 79 <param name="assume_sorted" value="True"/> |
78 <param name="remove_duplicates" value="True"/> | 80 <param name="remove_duplicates" value="True"/> |
79 <param name="read_name_regex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."/> | 81 <param name="read_name_regex" value=".*[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*"/> |
80 <param name="optical_duplicate_pixel_distance" value="100"/> | 82 <param name="optical_duplicate_pixel_distance" value="100"/> |
81 <param name="duplicate_scoring_strategy" value="SUM_OF_BASE_QUALITIES"/> | 83 <param name="duplicate_scoring_strategy" value="SUM_OF_BASE_QUALITIES"/> |
82 <param name="validation_stringency" value="LENIENT"/> | 84 <param name="validation_stringency" value="LENIENT"/> |
83 <output name="outFile" file="picard_MarkDuplicates_test1.bam" ftype="bam" lines_diff="4"/> | 85 <output name="outFile" file="picard_MarkDuplicates_test1.bam" ftype="bam" lines_diff="4"/> |
86 </test> | |
87 <test> | |
88 <param name="inputFile" value="picard_MarkDuplicates.bam" ftype="bam"/> | |
89 <param name="comment" value="test-run"/> | |
90 <param name="assume_sorted" value="True"/> | |
91 <param name="remove_duplicates" value="False"/> | |
92 <param name="read_name_regex" value=""/> | |
93 <param name="optical_duplicate_pixel_distance" value="100"/> | |
94 <param name="duplicate_scoring_strategy" value="SUM_OF_BASE_QUALITIES"/> | |
95 <param name="validation_stringency" value="LENIENT"/> | |
96 <output name="outFile" file="picard_MarkDuplicates_test2.bam" ftype="bam" lines_diff="4"/> | |
84 </test> | 97 </test> |
85 </tests> | 98 </tests> |
86 | 99 |
87 | 100 |
88 <help> | 101 <help> |
100 more times. | 113 more times. |
101 | 114 |
102 REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with | 115 REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with |
103 appropriate flags set. Default value: false. | 116 appropriate flags set. Default value: false. |
104 | 117 |
105 READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read | 118 READ_NAME_REGEX=String This option is only needed if your read names do not follow a standard illumina convention |
119 of colon separation but do contain tile, x, and y coordinates (unusual). | |
120 A regular expression that can be used to parse read names in the incoming SAM file. Read | |
106 names are parsed to extract three variables: tile/region, x coordinate and y coordinate. | 121 names are parsed to extract three variables: tile/region, x coordinate and y coordinate. |
107 These values are used to estimate the rate of optical duplication in order to give a more | 122 These values are used to estimate the rate of optical duplication in order to give a more |
108 accurate estimated library size. Set this option to null to disable optical duplicate | 123 accurate estimated library size. Set this option to null to disable optical duplicate |
109 detection. The regular expression should contain three capture groups for the three | 124 detection. The regular expression should contain three capture groups for the three |
110 variables, in order. It must match the entire read name. Note that if the default regex | 125 variables, in order. It must match the entire read name. Note that if the default regex |
111 is specified, a regex match is not actually done, but instead the read name is split on | 126 is specified, a regex match is not actually done, but instead the read name is split on |
112 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be | 127 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be |
113 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements | 128 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements |
114 are assumed to be tile, x and y values. Default value: | 129 are assumed to be tile, x and y values. Default value: '' |
115 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. | 130 |
116 | 131 |
117 DUPLICATE_SCORING_STRATEGY=ScoringStrategy | 132 DUPLICATE_SCORING_STRATEGY=ScoringStrategy |
118 DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value: | 133 DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value: |
119 SUM_OF_BASE_QUALITIES. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH} | 134 SUM_OF_BASE_QUALITIES. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH} |
120 | 135 |
121 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer | 136 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer |
122 The maximum offset between two duplicte clusters in order to consider them optical | 137 The maximum offset between two duplicate clusters in order to consider them optical |
123 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) | 138 duplicates. This should be set to 100 for (circa 2011+) read names and typical flowcells. |
124 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in | 139 Structured flow cells (NovaSeq, HiSeq 4000, X) should use ~2500. |
125 which case 50-100 is more normal. Default value: 100. | 140 For older conventions, distances could be to some fairly small number (e.g. 5-10 pixels) |
141 Default value: 100. | |
126 | 142 |
127 BARCODE_TAG=String Barcode SAM tag (ex. BC for 10X Genomics) Default value: null. | 143 BARCODE_TAG=String Barcode SAM tag (ex. BC for 10X Genomics) Default value: null. |
128 | 144 |
129 @more_info@ | 145 @more_info@ |
130 | 146 |