comparison picard_EstimateLibraryComplexity.xml @ 13:7e6fd3d0f16e draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit bf94a1505c131fb3f67c867b6e1d886780efa42e
author devteam
date Tue, 06 Dec 2016 10:04:41 -0500
parents 05087b27692a
children 465cbb0cf2eb
comparison
equal deleted inserted replaced
12:05087b27692a 13:7e6fd3d0f16e
4 <import>picard_macros.xml</import> 4 <import>picard_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
8 @java_options@ 8 @java_options@
9 9 @symlink_element_identifier@
10 picard 10 picard
11 EstimateLibraryComplexity 11 EstimateLibraryComplexity
12 12
13 INPUT="${inputFile}" 13 INPUT='$inputFile.element_identifier'
14 OUTPUT="${outFile}" 14 OUTPUT="${outFile}"
15 15
16 MIN_IDENTICAL_BASES="${min_identical_bases}" 16 MIN_IDENTICAL_BASES="${min_identical_bases}"
17 MAX_DIFF_RATE="${max_diff_rate}" 17 MAX_DIFF_RATE="${max_diff_rate}"
18 MIN_MEAN_QUALITY="${min_mean_quality}" 18 MIN_MEAN_QUALITY="${min_mean_quality}"
19 MAX_GROUP_RATIO="${max_group_ratio}" 19 MAX_GROUP_RATIO="${max_group_ratio}"
20 #import pipes 20 #import pipes
21 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" } 21 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" }
22 OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}" 22 OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}"
23 23
24 VALIDATION_STRINGENCY="${validation_stringency}" 24 VALIDATION_STRINGENCY="${validation_stringency}"
25 QUIET=true 25 QUIET=true
26 VERBOSITY=ERROR 26 VERBOSITY=ERROR
27 27
28 ]]></command> 28 ]]></command>
29 <inputs> 29 <inputs>
30 <param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" /> 30 <param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" />
31 <param name="min_identical_bases" type="integer" value="5" label="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection" help="MIN_IDENTICAL_BASES; In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU; default=5"/> 31 <param name="min_identical_bases" type="integer" value="5" label="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection" help="MIN_IDENTICAL_BASES; In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU; default=5"/>
32 <param name="max_diff_rate" type="float" value="0.03" label="The maximum rate of differences between two reads to call them identical" help="MAX_DIFF_RATE; default=0.03"/> 32 <param name="max_diff_rate" type="float" value="0.03" label="The maximum rate of differences between two reads to call them identical" help="MAX_DIFF_RATE; default=0.03"/>
40 </sanitizer> 40 </sanitizer>
41 </param> 41 </param>
42 <param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/> 42 <param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/>
43 43
44 <expand macro="VS" /> 44 <expand macro="VS" />
45 45
46 </inputs> 46 </inputs>
47 47
48 <outputs> 48 <outputs>
49 <data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Library complexity report"/> 49 <data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Library complexity report"/>
50 </outputs> 50 </outputs>
51 51
52 <tests> 52 <tests>
53 <test> 53 <test>
54 <param name="inputFile" value="picard_EstimateLibraryComplexity.bam" ftype="bam"/> 54 <param name="inputFile" value="picard_EstimateLibraryComplexity.bam" ftype="bam"/>
55 <param name="min_identical_bases" value="5"/> 55 <param name="min_identical_bases" value="5"/>
56 <param name="max_diff_rate" value="0.03"/> 56 <param name="max_diff_rate" value="0.03"/>
60 <param name="max_group_ratio" value="500"/> 60 <param name="max_group_ratio" value="500"/>
61 <param name="validation_stringency" value="LENIENT"/> 61 <param name="validation_stringency" value="LENIENT"/>
62 <output name="outFile" file="picard_EstimateLibraryComplexity_test1.tab" ftype="tabular" lines_diff="4"/> 62 <output name="outFile" file="picard_EstimateLibraryComplexity_test1.tab" ftype="tabular" lines_diff="4"/>
63 </test> 63 </test>
64 </tests> 64 </tests>
65 65
66 66
67 <help> 67 <help>
68 68
69 **Purpose** 69 **Purpose**
70 70
71 Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads by the first N bases (5 by default) 71 Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads by the first N bases (5 by default)
84 84
85 @dataset_collections@ 85 @dataset_collections@
86 86
87 @description@ 87 @description@
88 88
89 MIN_IDENTICAL_BASES=Integer The minimum number of bases at the starts of reads that must be identical for reads to be 89 MIN_IDENTICAL_BASES=Integer The minimum number of bases at the starts of reads that must be identical for reads to be
90 grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads 90 grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads
91 will be compared at a time, so lower numbers will produce more accurate results but 91 will be compared at a time, so lower numbers will produce more accurate results but
92 consume exponentially more memory and CPU. Default value: 5. 92 consume exponentially more memory and CPU. Default value: 5.
93 93
94 MAX_DIFF_RATE=Double The maximum rate of differences between two reads to call them identical. Default value: 94 MAX_DIFF_RATE=Double The maximum rate of differences between two reads to call them identical. Default value:
95 0.03. 95 0.03.
96 96
97 MIN_MEAN_QUALITY=Integer The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads 97 MIN_MEAN_QUALITY=Integer The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads
98 with lower average quality are filtered out and not considered in any calculations. 98 with lower average quality are filtered out and not considered in any calculations.
99 Default value: 20. 99 Default value: 20.
100 100
101 MAX_GROUP_RATIO=Integer Do not process self-similar groups that are this many times over the mean expected group 101 MAX_GROUP_RATIO=Integer Do not process self-similar groups that are this many times over the mean expected group
102 size. I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then 102 size. I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then
103 the mean expected group size would be approximately 10 reads. Default value: 500. 103 the mean expected group size would be approximately 10 reads. Default value: 500.
104 104
105 READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read 105 READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read
106 names are parsed to extract three variables: tile/region, x coordinate and y coordinate. 106 names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
107 These values are used to estimate the rate of optical duplication in order to give a more 107 These values are used to estimate the rate of optical duplication in order to give a more
108 accurate estimated library size. Set this option to null to disable optical duplicate 108 accurate estimated library size. Set this option to null to disable optical duplicate
109 detection. The regular expression should contain three capture groups for the three 109 detection. The regular expression should contain three capture groups for the three
110 variables, in order. It must match the entire read name. Note that if the default regex 110 variables, in order. It must match the entire read name. Note that if the default regex
111 is specified, a regex match is not actually done, but instead the read name is split on 111 is specified, a regex match is not actually done, but instead the read name is split on
112 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be 112 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
113 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements 113 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
114 are assumed to be tile, x and y values. Default value: 114 are assumed to be tile, x and y values. Default value:
115 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. 115 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
116 116
117 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer 117 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
118 The maximum offset between two duplicte clusters in order to consider them optical 118 The maximum offset between two duplicte clusters in order to consider them optical
119 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) 119 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
120 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in 120 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
121 which case 50-100 is more normal. Default value: 100. 121 which case 50-100 is more normal. Default value: 100.
122 122
123 123
124 @more_info@ 124 @more_info@
125 125
126 </help> 126 </help>
127 </tool> 127 </tool>