Mercurial > repos > devteam > picard
comparison picard_SamToFastq.xml @ 5:3d4f1fa26f0e draft
Uploaded
author | devteam |
---|---|
date | Tue, 16 Dec 2014 19:03:21 -0500 |
parents | 9227b8c3093b |
children | 3a3234d7a2e8 |
comparison
equal
deleted
inserted
replaced
4:ab1f60c26526 | 5:3d4f1fa26f0e |
---|---|
1 <tool id="picard_SamToFastq" name="SAM to FASTQ" version="1.56.1" force_history_refresh="True"> | 1 <tool name="SamToFastq" id="picard_SamToFastq" version="1.126.0"> |
2 <description>creates a FASTQ file</description> | 2 <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description> |
3 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> | 3 <requirements> |
4 <!-- Dan Blankenberg --> | 4 <requirement type="package" version="1.126.0">picard</requirement> |
5 <command interpreter="python">picard_SamToFastq_wrapper.py | 5 </requirements> |
6 -p ' | 6 |
7 java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC | 7 <macros> |
8 -jar "\$JAVA_JAR_PATH/SamToFastq.jar" | 8 <import>picard_macros.xml</import> |
9 INPUT="${input_sam}" | 9 </macros> |
10 VALIDATION_STRINGENCY="LENIENT" | 10 |
11 <command> | |
12 | |
13 echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below) | |
14 | |
15 @java_options@ | |
16 | |
17 java -jar \$JAVA_JAR_PATH/picard.jar | |
18 SamToFastq | |
19 | |
20 INPUT="${inputFile}" | |
21 | |
22 #if str( $output_per_rg ) == "true": | |
23 OUTPUT_PER_RG=true | |
24 OUTPUT_DIR=. | |
25 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false": | |
26 FASTQ=READ1.fastq | |
27 SECOND_END_FASTQ=READ2.fastq | |
28 UNPAIRED_FASTQ=UNPAIRED_READS.fastq | |
29 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true": | |
30 FASTQ=INTERLEAVED.fastq | |
31 #end if | |
32 | |
11 RE_REVERSE="${re_reverse}" | 33 RE_REVERSE="${re_reverse}" |
34 INTERLEAVE="${interleave}" | |
12 INCLUDE_NON_PF_READS="${include_non_pf_reads}" | 35 INCLUDE_NON_PF_READS="${include_non_pf_reads}" |
13 #if str( $clipping_attribute ): | 36 CLIPPING_ATTRIBUTE="${clipping_attribute}" |
14 CLIPPING_ATTRIBUTE="${clipping_attribute}" | 37 CLIPPING_ACTION="${clipping_action}" |
38 READ1_TRIM="${read1_trim}" | |
39 | |
40 #if int($read1_max_bases_to_write) > -1: | |
41 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}" | |
15 #end if | 42 #end if |
16 #if str( $clipping_action ): | 43 |
17 CLIPPING_ACTION="${clipping_action}" | 44 READ2_TRIM="${read2_trim}" |
45 | |
46 #if int($read2_max_bases_to_write) > -1: | |
47 READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}" | |
18 #end if | 48 #end if |
19 #if str( $read1_trim ): | 49 |
20 READ1_TRIM="${read1_trim}" | |
21 #end if | |
22 #if str( $read1_max_bases_to_write ): | |
23 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}" | |
24 #end if | |
25 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}" | 50 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}" |
26 | 51 |
27 #if str( $output_per_read_group_selector ) == 'per_sam_file': | 52 |
28 ##OUTPUT_PER_RG=false | 53 VALIDATION_STRINGENCY="${validation_stringency}" |
29 FASTQ="${output_fastq1}" | 54 QUIET=true |
30 | 55 VERBOSITY=ERROR |
31 #if str( $single_paired_end_type.single_paired_end_type_selector ) == 'paired': | 56 |
32 SECOND_END_FASTQ="${output_fastq2}" | |
33 #if str( $single_paired_end_type.read2_trim ): | |
34 READ2_TRIM="${single_paired_end_type.read2_trim}" | |
35 #end if | |
36 #if str( $single_paired_end_type.read2_max_bases_to_write ): | |
37 READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" | |
38 #end if | |
39 #end if | |
40 ' | |
41 #else: | |
42 OUTPUT_PER_RG=true | |
43 #if str( $single_paired_end_type.single_paired_end_type_selector ) == 'paired': | |
44 ' | |
45 --read_group_file_2 "${output_fastq2}" | |
46 --file_id_2 "${output_fastq2.id}" | |
47 -p ' | |
48 #if str( $single_paired_end_type.read2_trim ): | |
49 READ2_TRIM="${single_paired_end_type.read2_trim}" | |
50 #end if | |
51 #if str( $single_paired_end_type.read2_max_bases_to_write ): | |
52 READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" | |
53 #end if | |
54 #end if | |
55 ' | |
56 --read_group_file_1 "${output_fastq1}" | |
57 --new_files_path "${__new_file_path__}" | |
58 --file_id_1 "${output_fastq1.id}" | |
59 #end if | |
60 </command> | 57 </command> |
61 <inputs> | 58 <inputs> |
62 <param name="input_sam" type="data" format="sam,bam" label="BAM/SAM file" /> | 59 |
63 <param name="read1_trim" type="integer" value="" optional="True" label="The number of bases to trim from the beginning of read 1." /> | 60 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/> |
64 <param name="read1_max_bases_to_write" type="integer" optional="True" value="" label="The maximum number of bases to write from read 1 after trimming." /> | 61 <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/> |
65 <param name="output_per_read_group_selector" type="select" label="Output per read group"> | 62 <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/> |
66 <option value="per_sam_file" selected="True">Per BAM/SAM file</option> | 63 <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/> |
67 <option value="per_read_group">Per Read Group</option> | 64 <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/> |
68 </param> | 65 <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/> |
69 <conditional name="single_paired_end_type"> | 66 <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/> |
70 <param name="single_paired_end_type_selector" type="select" label="Single or Paired end"> | 67 <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/> |
71 <option value="single" selected="True">Single</option> | 68 <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> |
72 <option value="paired">Paired end</option> | 69 <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/> |
73 </param> | 70 <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> |
74 <when value="single"> | 71 <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/> |
75 <!-- nothing yet --> | 72 |
76 </when> | 73 <expand macro="VS" /> |
77 <when value="paired"> | 74 |
78 <param name="read2_trim" type="integer" value="" optional="True" label="The number of bases to trim from the beginning of read 2." /> | 75 </inputs> |
79 <param name="read2_max_bases_to_write" type="integer" optional="True" value="" label="The maximum number of bases to write from read 2 after trimming." /> | 76 |
80 </when> | |
81 </conditional> | |
82 <param name="re_reverse" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Re-reverse bases and qualities of reads on negative strand"/> | |
83 <param name="include_non_pf_reads" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Include non-PF reads from the SAM file into the output FASTQ files."/> | |
84 <param name="clipping_attribute" type="text" value="" label="The attribute that stores the position at which the SAM record should be clipped" help="Leave blank for null" /> | |
85 <param name="clipping_action" type="text" value="" label="The action that should be taken with clipped reads" help="'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region. Leave blank for null" /> | |
86 <param name="include_non_primary_alignments" type="boolean" truevalue="true" falsevalue="false" checked="False" label="If true, include non-primary alignments in the output." help="Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments."/> | |
87 | |
88 </inputs> | |
89 <outputs> | 77 <outputs> |
90 <data format="fastqsanger" name="output_fastq1" label="${tool.name} on ${on_string}: FASTQ 1" /> | 78 <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files --> |
91 <data format="fastqsanger" name="output_fastq2" label="${tool.name} on ${on_string}: FASTQ 2" > | 79 <data format="txt" name="report" label="SamToFastq run" hidden="true"> |
92 <filter>single_paired_end_type['single_paired_end_type_selector'] == 'paired'</filter> | 80 <discover_datasets pattern="(?P<designation>.+)\.fastq" ext="fastqsanger" visible="true"/> |
93 </data> | 81 </data> |
94 </outputs> | 82 </outputs> |
83 | |
95 <tests> | 84 <tests> |
96 <test> | 85 <test> |
97 <param name="input_sam" value="bfast_out1.sam" ftype="sam" /> | 86 <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/> |
98 <param name="output_per_read_group_selector" value="per_sam_file" /> | 87 <param name="output_per_rg" value="false"/> |
99 <param name="single_paired_end_type_selector" value="single" /> | 88 <param name="re_reverse" value="true"/> |
100 <param name="read1_trim" value="" /> | 89 <param name="interleave" value="true"/> |
101 <param name="read1_max_bases_to_write" value="" /> | 90 <param name="include_non_pf_reads" value="false"/> |
102 <param name="re_reverse" value="True" /> | 91 <param name="clipping_attribute" value="null" /> |
103 <param name="include_non_pf_reads" value="False" /> | 92 <param name="clipping_action" value="null" /> |
104 <param name="clipping_action" value="" /> | 93 <param name="read1_trim" value="0" /> |
105 <param name="clipping_attribute" value="" /> | 94 <param name="read1_max_bases_to_write" value="-1"/> |
106 <param name="include_non_primary_alignments" value="False" /> | 95 <param name="read2_trim" value="0" /> |
107 <output name="output_fastq1" file="random_phiX_1.fastqsanger"/> | 96 <param name="read2_max_bases_to_write" value="-1"/> |
108 </test> | 97 <param name="include_non_primary_alignments" value="false"/> |
109 <test> | 98 <output name="report"> |
110 <param name="input_sam" value="bwa_wrapper_out3.sam" ftype="sam" /> | 99 <assert_contents> |
111 <param name="output_per_read_group_selector" value="per_sam_file" /> | 100 <has_line line="BAM" /> |
112 <param name="single_paired_end_type_selector" value="paired" /> | 101 </assert_contents> |
113 <param name="read1_trim" value="" /> | 102 <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/> |
114 <param name="read1_max_bases_to_write" value="" /> | 103 </output> |
115 <param name="read2_trim" value="" /> | 104 </test> |
116 <param name="read2_max_bases_to_write" value="" /> | |
117 <param name="re_reverse" value="True" /> | |
118 <param name="include_non_pf_reads" value="False" /> | |
119 <param name="clipping_action" value="" /> | |
120 <param name="clipping_attribute" value="" /> | |
121 <param name="include_non_primary_alignments" value="False" /> | |
122 <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> | |
123 <output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> | |
124 </test> | |
125 <test> | |
126 <param name="input_sam" value="bwa_wrapper_out3.sam" ftype="sam" /> | |
127 <param name="output_per_read_group_selector" value="per_read_group" /> | |
128 <param name="single_paired_end_type_selector" value="paired" /> | |
129 <param name="read1_trim" value="" /> | |
130 <param name="read1_max_bases_to_write" value="" /> | |
131 <param name="read2_trim" value="" /> | |
132 <param name="read2_max_bases_to_write" value="" /> | |
133 <param name="re_reverse" value="True" /> | |
134 <param name="include_non_pf_reads" value="False" /> | |
135 <param name="clipping_action" value="" /> | |
136 <param name="clipping_attribute" value="" /> | |
137 <param name="include_non_primary_alignments" value="False" /> | |
138 <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> | |
139 <output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> | |
140 </test> | |
141 </tests> | 105 </tests> |
106 | |
107 <stdio> | |
108 <exit_code range="1:" level="fatal"/> | |
109 </stdio> | |
110 | |
142 <help> | 111 <help> |
143 **What it does** | 112 |
144 | 113 **Purpose** |
145 Picard: SamToFastq converts SAM files to FASTQ files. | 114 |
146 | 115 Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer. |
147 Extracts read sequences and qualities from the input SAM/BAM file and writes them into the output file in Sanger fastq format. In the RC mode (default is True), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM file will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer. | 116 |
148 | 117 ----- |
149 ------ | 118 |
150 | 119 .. class:: warningmark |
151 Please cite the website "http://picard.sourceforge.net". | 120 |
152 | 121 **DANGER: Multiple Outputs** |
153 ------ | 122 |
154 | 123 Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing! |
155 | 124 |
156 **Input formats** | 125 @dataset_collections@ |
157 | 126 |
158 FastqToSam accepts SAM input files, see http://samtools.sourceforge.net for more details. | 127 @description@ |
159 | 128 |
160 ------ | 129 FASTQ=File |
161 | 130 F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq). |
162 **Outputs** | 131 Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) |
163 | 132 |
164 The output is in FASTQ format. If using Paired end data, 2 fastq files are created. | 133 SECOND_END_FASTQ=File |
165 | 134 F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null. |
166 ------- | 135 Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) |
167 | 136 |
168 **FastqToSam settings** | 137 UNPAIRED_FASTQ=File |
169 | 138 FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default |
170 This is list of SamToFastq options:: | 139 value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) |
171 | 140 |
172 INPUT=File Input SAM/BAM file to extract reads from Required. | 141 OUTPUT_PER_RG=Boolean |
173 FASTQ=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq). Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) | 142 OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is |
174 SECOND_END_FASTQ=File Output fastq file (if paired, second end of the pair fastq). Default value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) | 143 paired). Default value: false. Possible values: {true, false} Cannot be used in |
175 OUTPUT_PER_RG=Boolean Output a fastq file per read group (two fastq files per read group if the group is paired). Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} Cannot be used in conjuction with option(s) SECOND_END_FASTQ (F2) FASTQ (F) | 144 conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F) |
176 OUTPUT_DIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true. Default value: null. | 145 |
177 RE_REVERSE=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} | 146 OUTPUT_DIR=File |
178 INCLUDE_NON_PF_READS=Boolean Include non-PF reads from the SAM file into the output FASTQ files. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} | 147 ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true. |
179 CLIPPING_ATTRIBUTE=String The attribute that stores the position at which the SAM record should be clipped Default value: null. | 148 Default value: null. |
180 CLIPPING_ACTION=String The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region. Default value: null. | 149 |
181 READ1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0. This option can be set to 'null' to clear the default value. | 150 RE_REVERSE=Boolean |
182 READ1_MAX_BASES_TO_WRITE=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written. Default value: null. | 151 RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them |
183 READ2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0. This option can be set to 'null' to clear the default value. | 152 to fastq Default value: true. Possible values: {true, false} |
184 READ2_MAX_BASES_TO_WRITE=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written. Default value: null. | 153 |
185 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} | 154 INTERLEAVE=Boolean |
186 | 155 INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe |
156 which end it came from Default value: false. Possible values: {true, false} | |
157 | |
158 INCLUDE_NON_PF_READS=Boolean | |
159 NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes | |
160 filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads. | |
161 Default value: false. Possible values: {true, false} | |
162 | |
163 CLIPPING_ATTRIBUTE=String | |
164 CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default | |
165 value: null. | |
166 | |
167 CLIPPING_ACTION=String | |
168 CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities | |
169 should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in | |
170 the clipped region; and any integer means that the base qualities should be set to that | |
171 value in the clipped region. Default value: null. | |
172 | |
173 READ1_TRIM=Integer | |
174 R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0. | |
175 | |
176 READ1_MAX_BASES_TO_WRITE=Integer | |
177 R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than | |
178 this many bases left after trimming, all will be written. If this value is null then all | |
179 bases left after trimming will be written. Default value: null. | |
180 | |
181 READ2_TRIM=Integer | |
182 R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0. | |
183 | |
184 READ2_MAX_BASES_TO_WRITE=Integer | |
185 R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than | |
186 this many bases left after trimming, all will be written. If this value is null then all | |
187 bases left after trimming will be written. Default value: null. | |
188 | |
189 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean | |
190 If true, include non-primary alignments in the output. Support of non-primary alignments | |
191 in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and | |
192 there are paired reads with non-primary alignments. Default value: false. | |
193 Possible values: {true, false} | |
194 | |
195 @more_info@ | |
187 | 196 |
188 </help> | 197 </help> |
189 </tool> | 198 </tool> |
199 | |
200 |