Mercurial > repos > devteam > picard_122_up
comparison picard_SamToFastq.xml @ 0:b76a4f17bbbb draft
Uploaded
| author | devteam |
|---|---|
| date | Thu, 23 Oct 2014 11:31:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b76a4f17bbbb |
|---|---|
| 1 <tool name="SamToFastq" id="picard_SamToFastq" version="1.122.0"> | |
| 2 <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.122.0">picard</requirement> | |
| 5 </requirements> | |
| 6 | |
| 7 <macros> | |
| 8 <import>picard_macros.xml</import> | |
| 9 </macros> | |
| 10 | |
| 11 <command> | |
| 12 | |
| 13 echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below) | |
| 14 | |
| 15 @java_options@ | |
| 16 | |
| 17 java -jar \$JAVA_JAR_PATH/SamToFastq.jar | |
| 18 | |
| 19 INPUT="${inputFile}" | |
| 20 | |
| 21 #if str( $output_per_rg ) == "true": | |
| 22 OUTPUT_PER_RG=true | |
| 23 OUTPUT_DIR=. | |
| 24 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false": | |
| 25 FASTQ=READ1.fastq | |
| 26 SECOND_END_FASTQ=READ2.fastq | |
| 27 UNPAIRED_FASTQ=UNPAIRED_READS.fastq | |
| 28 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true": | |
| 29 FASTQ=INTERLEAVED.fastq | |
| 30 #end if | |
| 31 | |
| 32 RE_REVERSE="${re_reverse}" | |
| 33 INTERLEAVE="${interleave}" | |
| 34 INCLUDE_NON_PF_READS="${include_non_pf_reads}" | |
| 35 CLIPPING_ATTRIBUTE="${clipping_attribute}" | |
| 36 CLIPPING_ACTION="${clipping_action}" | |
| 37 READ1_TRIM="${read1_trim}" | |
| 38 | |
| 39 #if int($read1_max_bases_to_write) > -1: | |
| 40 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}" | |
| 41 #end if | |
| 42 | |
| 43 READ2_TRIM="${read2_trim}" | |
| 44 | |
| 45 #if int($read2_max_bases_to_write) > -1: | |
| 46 READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}" | |
| 47 #end if | |
| 48 | |
| 49 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}" | |
| 50 | |
| 51 | |
| 52 VALIDATION_STRINGENCY="${validation_stringency}" | |
| 53 QUIET=true | |
| 54 VERBOSITY=ERROR | |
| 55 | |
| 56 </command> | |
| 57 <inputs> | |
| 58 | |
| 59 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/> | |
| 60 <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/> | |
| 61 <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/> | |
| 62 <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/> | |
| 63 <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/> | |
| 64 <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/> | |
| 65 <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/> | |
| 66 <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/> | |
| 67 <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> | |
| 68 <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/> | |
| 69 <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> | |
| 70 <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/> | |
| 71 | |
| 72 <expand macro="VS" /> | |
| 73 | |
| 74 </inputs> | |
| 75 | |
| 76 <outputs> | |
| 77 <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files --> | |
| 78 <data format="txt" name="report" label="SamToFastq run" hidden="true"> | |
| 79 <discover_datasets pattern="(?P<designation>.+)\.fastq" ext="fastqsanger" visible="true"/> | |
| 80 </data> | |
| 81 </outputs> | |
| 82 | |
| 83 <tests> | |
| 84 <test> | |
| 85 <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/> | |
| 86 <param name="output_per_rg" value="false"/> | |
| 87 <param name="re_reverse" value="true"/> | |
| 88 <param name="interleave" value="true"/> | |
| 89 <param name="include_non_pf_reads" value="false"/> | |
| 90 <param name="clipping_attribute" value="null" /> | |
| 91 <param name="clipping_action" value="null" /> | |
| 92 <param name="read1_trim" value="0" /> | |
| 93 <param name="read1_max_bases_to_write" value="-1"/> | |
| 94 <param name="read2_trim" value="0" /> | |
| 95 <param name="read2_max_bases_to_write" value="-1"/> | |
| 96 <param name="include_non_primary_alignments" value="false"/> | |
| 97 <output name="report"> | |
| 98 <assert_contents> | |
| 99 <has_line line="BAM" /> | |
| 100 </assert_contents> | |
| 101 <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/> | |
| 102 </output> | |
| 103 </test> | |
| 104 </tests> | |
| 105 | |
| 106 <stdio> | |
| 107 <exit_code range="1:" level="fatal"/> | |
| 108 </stdio> | |
| 109 | |
| 110 <help> | |
| 111 | |
| 112 **Purpose** | |
| 113 | |
| 114 Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer. | |
| 115 | |
| 116 ----- | |
| 117 | |
| 118 .. class:: warningmark | |
| 119 | |
| 120 **DANGER: Multiple Outputs** | |
| 121 | |
| 122 Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing! | |
| 123 | |
| 124 @dataset_collections@ | |
| 125 | |
| 126 @description@ | |
| 127 | |
| 128 FASTQ=File | |
| 129 F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq). | |
| 130 Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) | |
| 131 | |
| 132 SECOND_END_FASTQ=File | |
| 133 F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null. | |
| 134 Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) | |
| 135 | |
| 136 UNPAIRED_FASTQ=File | |
| 137 FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default | |
| 138 value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) | |
| 139 | |
| 140 OUTPUT_PER_RG=Boolean | |
| 141 OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is | |
| 142 paired). Default value: false. Possible values: {true, false} Cannot be used in | |
| 143 conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F) | |
| 144 | |
| 145 OUTPUT_DIR=File | |
| 146 ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true. | |
| 147 Default value: null. | |
| 148 | |
| 149 RE_REVERSE=Boolean | |
| 150 RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them | |
| 151 to fastq Default value: true. Possible values: {true, false} | |
| 152 | |
| 153 INTERLEAVE=Boolean | |
| 154 INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe | |
| 155 which end it came from Default value: false. Possible values: {true, false} | |
| 156 | |
| 157 INCLUDE_NON_PF_READS=Boolean | |
| 158 NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes | |
| 159 filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads. | |
| 160 Default value: false. Possible values: {true, false} | |
| 161 | |
| 162 CLIPPING_ATTRIBUTE=String | |
| 163 CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default | |
| 164 value: null. | |
| 165 | |
| 166 CLIPPING_ACTION=String | |
| 167 CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities | |
| 168 should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in | |
| 169 the clipped region; and any integer means that the base qualities should be set to that | |
| 170 value in the clipped region. Default value: null. | |
| 171 | |
| 172 READ1_TRIM=Integer | |
| 173 R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0. | |
| 174 | |
| 175 READ1_MAX_BASES_TO_WRITE=Integer | |
| 176 R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than | |
| 177 this many bases left after trimming, all will be written. If this value is null then all | |
| 178 bases left after trimming will be written. Default value: null. | |
| 179 | |
| 180 READ2_TRIM=Integer | |
| 181 R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0. | |
| 182 | |
| 183 READ2_MAX_BASES_TO_WRITE=Integer | |
| 184 R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than | |
| 185 this many bases left after trimming, all will be written. If this value is null then all | |
| 186 bases left after trimming will be written. Default value: null. | |
| 187 | |
| 188 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean | |
| 189 If true, include non-primary alignments in the output. Support of non-primary alignments | |
| 190 in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and | |
| 191 there are paired reads with non-primary alignments. Default value: false. | |
| 192 Possible values: {true, false} | |
| 193 | |
| 194 @more_info@ | |
| 195 | |
| 196 </help> | |
| 197 </tool> | |
| 198 | |
| 199 |
