annotate picard_CollectInsertSizeMetrics.xml @ 6:4ff1e04010e6 draft

Uploaded
author devteam
date Thu, 15 Jan 2015 15:37:04 -0500
parents 3d4f1fa26f0e
children 3a3234d7a2e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
1 <tool name="CollectInsertSizeMetrics" id="picard_CollectInsertSizeMetrics" version="1.126.0">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
2 <description>plots distribution of insert sizes</description>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
3 <requirements>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
4 <requirement type="package" version="1.126.0">picard</requirement>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
5 </requirements>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
6
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
7 <macros>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
8 <import>picard_macros.xml</import>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
9 </macros>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
10
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
11 <command>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
12 @java_options@
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
13 ##set up input files
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
14
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
15 #set $reference_fasta_filename = "localref.fa"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
16
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
17 #if str( $reference_source.reference_source_selector ) == "history":
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
18 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
19 #else:
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
20 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
21 #end if
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
22
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
23 java -jar \$JAVA_JAR_PATH/picard.jar
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
24 CollectInsertSizeMetrics
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
25 INPUT="${inputFile}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
26 OUTPUT="${outFile}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
27 HISTOGRAM_FILE="${histFile}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
28 DEVIATIONS="${deviations}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
29
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
30 #if str( $hist_width ):
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
31 HISTOGRAM_WIDTH="${hist_width}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
32 #end if
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
33
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
34 MINIMUM_PCT="${min_pct}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
35 REFERENCE_SEQUENCE="${reference_fasta_filename}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
36 ASSUME_SORTED="${assume_sorted}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
37 METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
38
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
39 VALIDATION_STRINGENCY="${validation_stringency}"
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
40 QUIET=true
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
41 VERBOSITY=ERROR
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
42
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
43 </command>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
44 <inputs>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
45 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset."/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
46 <conditional name="reference_source">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
47 <param name="reference_source_selector" type="select" label="Load reference genome from">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
48 <option value="cached">Local cache</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
49 <option value="history">History</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
50 </param>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
51 <when value="cached">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
52 <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
53 <options from_data_table="all_fasta">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
54 </options>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
55 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
56 </param>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
57 </when>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
58 <when value="history">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
59 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
60 </when>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
61 </conditional>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
62 <param name="deviations" type="float" value="10.0" label="Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION" help="DEVIATIONS; This option is offered because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and SD grossly misleading regarding the real distribution. default=10.0"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
63 <param name="hist_width" type="integer" optional="True" label="Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail" help="HISTOGRAM_WIDTH; optional"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
64 <param name="min_pct" type="float" value="0.05" label="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads" help="MINIMUM_PCT; (Range: 0 to 1). default=0.05. "/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
65 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
66 <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
67 <option value="ALL_READS" selected="True">All reads</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
68 <option value="SAMPLE">Sample</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
69 <option value="LIBRARY">Library</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
70 <option value="READ_GROUP">Read group</option>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
71 </param>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
72
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
73 <expand macro="VS" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
74
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
75 </inputs>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
76
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
77 <outputs>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
78 <data format="tabular" name="outFile"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
79 <data format="pdf" name="histFile"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
80 </outputs>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
81
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
82 <tests>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
83 <test>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
84 <param name="metric_accumulation_level" value="ALL_READS"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
85 <param name="deviations" value="10.0" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
86 <param name="hist_width" value="500" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
87 <param name="min_pct" value="0.05" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
88 <param name="assume_sorted" value="true" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
89 <param name="reference_source_selector" value="history" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
90 <param name="ref_file" value="picard_CollectInsertSizeMetrics_ref.fa" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
91 <param name="inputFile" value="picard_CollectInsertSizeMetrics.bam" ftype="bam" />
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
92 <output name="outFile" file="picard_CollectInsertSizeMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
93 </test>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
94 </tests>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
95
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
96 <stdio>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
97 <exit_code range="1:" level="fatal"/>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
98 </stdio>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
99
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
100 <help>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
101
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
102 .. class:: infomark
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
103
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
104 **Purpose**
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
105
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
106 Reads a SAM or BAM dataset and writes a file containing metrics about the statistical distribution of insert size (excluding duplicates) and generates a Histogram plot.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
107
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
108 @dataset_collections@
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
109
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
110 @description@
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
111
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
112
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
113 DEVIATIONS=Double Generate mean, sd and plots by trimming the data down to MEDIAN +
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
114 DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
115 includes enough anomalous values from chimeras and other artifacts to make the mean and
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
116 sd grossly misleading regarding the real distribution. Default value: 10.0.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
117
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
118 HISTOGRAM_WIDTH=Integer
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
119 W=Integer Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
120 Also, when calculating mean and standard deviation, only bins &lt;= Histogram_WIDTH will be
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
121 included. Default value: not set.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
122
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
123 MINIMUM_PCT=Float
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
124 M=Float When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
125 have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
126
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
127 METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
128 LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
129 LIBRARY, READ_GROUP} This option may be specified 0 or more times.
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
130
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
131 ASSUME_SORTED=Boolean
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
132 AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
133 value: true. This option can be set to 'null' to clear the default value. Possible
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
134 values: {true, false}
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
135
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
136 @more_info@
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
137
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
138 </help>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
139 </tool>
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
140
3d4f1fa26f0e Uploaded
devteam
parents:
diff changeset
141